diff options
author | Sunil Goutham <sgoutham@cavium.com> | 2015-05-26 19:20:15 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-27 14:19:44 -0400 |
commit | 4863dea3fab01734768c9f7fc2c1590a8f1f6266 (patch) | |
tree | 02debc8fb06d2d5f94b9c71eb9bc093a75f86e76 /drivers | |
parent | e5c4708b2b6fec0300db60ee3cf4b4ae96430a12 (diff) |
net: Adding support for Cavium ThunderX network controller
This patch adds support for the Cavium ThunderX network controller.
The driver is on the pci bus and thus requires the Thunder PCIe host
controller driver to be enabled.
Signed-off-by: Maciej Czekaj <mjc@semihalf.com>
Signed-off-by: David Daney <david.daney@cavium.com>
Signed-off-by: Sunil Goutham <sgoutham@cavium.com>
Signed-off-by: Ganapatrao Kulkarni <ganapatrao.kulkarni@caviumnetworks.com>
Signed-off-by: Aleksey Makarov <aleksey.makarov@caviumnetworks.com>
Signed-off-by: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Signed-off-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Kamil Rytarowski <kamil@semihalf.com>
Signed-off-by: Thanneeru Srinivasulu <tsrinivasulu@caviumnetworks.com>
Signed-off-by: Sruthi Vangala <svangala@cavium.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/ethernet/Kconfig | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/Makefile | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/Kconfig | 40 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/Makefile | 5 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/Makefile | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nic.h | 414 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nic_main.c | 940 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nic_reg.h | 213 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c | 601 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_main.c | 1332 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_queues.c | 1544 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/nicvf_queues.h | 381 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/q_struct.h | 701 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 966 | ||||
-rw-r--r-- | drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 223 |
15 files changed, 7373 insertions, 0 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index eadcb053807e..9a8308553520 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -34,6 +34,7 @@ source "drivers/net/ethernet/adi/Kconfig" source "drivers/net/ethernet/broadcom/Kconfig" source "drivers/net/ethernet/brocade/Kconfig" source "drivers/net/ethernet/calxeda/Kconfig" +source "drivers/net/ethernet/cavium/Kconfig" source "drivers/net/ethernet/chelsio/Kconfig" source "drivers/net/ethernet/cirrus/Kconfig" source "drivers/net/ethernet/cisco/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 1367afcd0a8b..4395d99115a0 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_NET_BFIN) += adi/ obj-$(CONFIG_NET_VENDOR_BROADCOM) += broadcom/ obj-$(CONFIG_NET_VENDOR_BROCADE) += brocade/ obj-$(CONFIG_NET_CALXEDA_XGMAC) += calxeda/ +obj-$(CONFIG_NET_VENDOR_CAVIUM) += cavium/ obj-$(CONFIG_NET_VENDOR_CHELSIO) += chelsio/ obj-$(CONFIG_NET_VENDOR_CIRRUS) += cirrus/ obj-$(CONFIG_NET_VENDOR_CISCO) += cisco/ diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig new file mode 100644 index 000000000000..6365fb4242be --- /dev/null +++ b/drivers/net/ethernet/cavium/Kconfig @@ -0,0 +1,40 @@ +# +# Cavium ethernet device configuration +# + +config NET_VENDOR_CAVIUM + tristate "Cavium ethernet drivers" + depends on PCI + ---help--- + Enable support for the Cavium ThunderX Network Interface + Controller (NIC). The NIC provides the controller and DMA + engines to move network traffic to/from the memory. The NIC + works closely with TNS, BGX and SerDes to implement the + functions replacing and virtualizing those of a typical + standalone PCIe NIC chip. + + If you have a Cavium Thunder board, say Y. + +if NET_VENDOR_CAVIUM + +config THUNDER_NIC_PF + tristate "Thunder Physical function driver" + default NET_VENDOR_CAVIUM + select THUNDER_NIC_BGX + ---help--- + This driver supports Thunder's NIC physical function. + +config THUNDER_NIC_VF + tristate "Thunder Virtual function driver" + default NET_VENDOR_CAVIUM + ---help--- + This driver supports Thunder's NIC virtual function + +config THUNDER_NIC_BGX + tristate "Thunder MAC interface driver (BGX)" + default NET_VENDOR_CAVIUM + ---help--- + This driver supports programming and controlling of MAC + interface from NIC physical function driver. + +endif # NET_VENDOR_CAVIUM diff --git a/drivers/net/ethernet/cavium/Makefile b/drivers/net/ethernet/cavium/Makefile new file mode 100644 index 000000000000..7aac4780d050 --- /dev/null +++ b/drivers/net/ethernet/cavium/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Cavium ethernet device drivers. +# + +obj-$(CONFIG_NET_VENDOR_CAVIUM) += thunder/ diff --git a/drivers/net/ethernet/cavium/thunder/Makefile b/drivers/net/ethernet/cavium/thunder/Makefile new file mode 100644 index 000000000000..5c4615ccaa14 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/Makefile @@ -0,0 +1,11 @@ +# +# Makefile for Cavium's Thunder ethernet device +# + +obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o +obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o +obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o + +nicpf-y := nic_main.o +nicvf-y := nicvf_main.o nicvf_queues.o +nicvf-y += nicvf_ethtool.o diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h new file mode 100644 index 000000000000..9b0be527909b --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -0,0 +1,414 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef NIC_H +#define NIC_H + +#include <linux/netdevice.h> +#include <linux/interrupt.h> +#include "thunder_bgx.h" + +/* PCI device IDs */ +#define PCI_DEVICE_ID_THUNDER_NIC_PF 0xA01E +#define PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF 0x0011 +#define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034 +#define PCI_DEVICE_ID_THUNDER_BGX 0xA026 + +/* PCI BAR nos */ +#define PCI_CFG_REG_BAR_NUM 0 +#define PCI_MSIX_REG_BAR_NUM 4 + +/* NIC SRIOV VF count */ +#define MAX_NUM_VFS_SUPPORTED 128 +#define DEFAULT_NUM_VF_ENABLED 8 + +#define NIC_TNS_BYPASS_MODE 0 +#define NIC_TNS_MODE 1 + +/* NIC priv flags */ +#define NIC_SRIOV_ENABLED BIT(0) + +/* Min/Max packet size */ +#define NIC_HW_MIN_FRS 64 +#define NIC_HW_MAX_FRS 9200 /* 9216 max packet including FCS */ + +/* Max pkinds */ +#define NIC_MAX_PKIND 16 + +/* Rx Channels */ +/* Receive channel configuration in TNS bypass mode + * Below is configuration in TNS bypass mode + * BGX0-LMAC0-CHAN0 - VNIC CHAN0 + * BGX0-LMAC1-CHAN0 - VNIC CHAN16 + * ... + * BGX1-LMAC0-CHAN0 - VNIC CHAN128 + * ... + * BGX1-LMAC3-CHAN0 - VNIC CHAN174 + */ +#define NIC_INTF_COUNT 2 /* Interfaces btw VNIC and TNS/BGX */ +#define NIC_CHANS_PER_INF 128 +#define NIC_MAX_CHANS (NIC_INTF_COUNT * NIC_CHANS_PER_INF) +#define NIC_CPI_COUNT 2048 /* No of channel parse indices */ + +/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */ +#define NIC_MAX_BGX MAX_BGX_PER_CN88XX +#define NIC_CPI_PER_BGX (NIC_CPI_COUNT / NIC_MAX_BGX) +#define NIC_MAX_CPI_PER_LMAC 64 /* Max when CPI_ALG is IP diffserv */ +#define NIC_RSSI_PER_BGX (NIC_RSSI_COUNT / NIC_MAX_BGX) + +/* Tx scheduling */ +#define NIC_MAX_TL4 1024 +#define NIC_MAX_TL4_SHAPERS 256 /* 1 shaper for 4 TL4s */ +#define NIC_MAX_TL3 256 +#define NIC_MAX_TL3_SHAPERS 64 /* 1 shaper for 4 TL3s */ +#define NIC_MAX_TL2 64 +#define NIC_MAX_TL2_SHAPERS 2 /* 1 shaper for 32 TL2s */ +#define NIC_MAX_TL1 2 + +/* TNS bypass mode */ +#define NIC_TL2_PER_BGX 32 +#define NIC_TL4_PER_BGX (NIC_MAX_TL4 / NIC_MAX_BGX) +#define NIC_TL4_PER_LMAC (NIC_MAX_TL4 / NIC_CHANS_PER_INF) + +/* NIC VF Interrupts */ +#define NICVF_INTR_CQ 0 +#define NICVF_INTR_SQ 1 +#define NICVF_INTR_RBDR 2 +#define NICVF_INTR_PKT_DROP 3 +#define NICVF_INTR_TCP_TIMER 4 +#define NICVF_INTR_MBOX 5 +#define NICVF_INTR_QS_ERR 6 + +#define NICVF_INTR_CQ_SHIFT 0 +#define NICVF_INTR_SQ_SHIFT 8 +#define NICVF_INTR_RBDR_SHIFT 16 +#define NICVF_INTR_PKT_DROP_SHIFT 20 +#define NICVF_INTR_TCP_TIMER_SHIFT 21 +#define NICVF_INTR_MBOX_SHIFT 22 +#define NICVF_INTR_QS_ERR_SHIFT 23 + +#define NICVF_INTR_CQ_MASK (0xFF << NICVF_INTR_CQ_SHIFT) +#define NICVF_INTR_SQ_MASK (0xFF << NICVF_INTR_SQ_SHIFT) +#define NICVF_INTR_RBDR_MASK (0x03 << NICVF_INTR_RBDR_SHIFT) +#define NICVF_INTR_PKT_DROP_MASK BIT(NICVF_INTR_PKT_DROP_SHIFT) +#define NICVF_INTR_TCP_TIMER_MASK BIT(NICVF_INTR_TCP_TIMER_SHIFT) +#define NICVF_INTR_MBOX_MASK BIT(NICVF_INTR_MBOX_SHIFT) +#define NICVF_INTR_QS_ERR_MASK BIT(NICVF_INTR_QS_ERR_SHIFT) + +/* MSI-X interrupts */ +#define NIC_PF_MSIX_VECTORS 10 +#define NIC_VF_MSIX_VECTORS 20 + +#define NIC_PF_INTR_ID_ECC0_SBE 0 +#define NIC_PF_INTR_ID_ECC0_DBE 1 +#define NIC_PF_INTR_ID_ECC1_SBE 2 +#define NIC_PF_INTR_ID_ECC1_DBE 3 +#define NIC_PF_INTR_ID_ECC2_SBE 4 +#define NIC_PF_INTR_ID_ECC2_DBE 5 +#define NIC_PF_INTR_ID_ECC3_SBE 6 +#define NIC_PF_INTR_ID_ECC3_DBE 7 +#define NIC_PF_INTR_ID_MBOX0 8 +#define NIC_PF_INTR_ID_MBOX1 9 + +/* Global timer for CQ timer thresh interrupts + * Calculated for SCLK of 700Mhz + * value written should be a 1/16th of what is expected + * + * 1 tick per 0.05usec = value of 2.2 + * This 10% would be covered in CQ timer thresh value + */ +#define NICPF_CLK_PER_INT_TICK 2 + +struct nicvf_cq_poll { + u8 cq_idx; /* Completion queue index */ + struct napi_struct napi; +}; + +#define NIC_RSSI_COUNT 4096 /* Total no of RSS indices */ +#define NIC_MAX_RSS_HASH_BITS 8 +#define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS) +#define RSS_HASH_KEY_SIZE 5 /* 320 bit key */ + +struct nicvf_rss_info { + bool enable; +#define RSS_L2_EXTENDED_HASH_ENA BIT(0) +#define RSS_IP_HASH_ENA BIT(1) +#define RSS_TCP_HASH_ENA BIT(2) +#define RSS_TCP_SYN_DIS BIT(3) +#define RSS_UDP_HASH_ENA BIT(4) +#define RSS_L4_EXTENDED_HASH_ENA BIT(5) +#define RSS_ROCE_ENA BIT(6) +#define RSS_L3_BI_DIRECTION_ENA BIT(7) +#define RSS_L4_BI_DIRECTION_ENA BIT(8) + u64 cfg; + u8 hash_bits; + u16 rss_size; + u8 ind_tbl[NIC_MAX_RSS_IDR_TBL_SIZE]; + u64 key[RSS_HASH_KEY_SIZE]; +} ____cacheline_aligned_in_smp; + +enum rx_stats_reg_offset { + RX_OCTS = 0x0, + RX_UCAST = 0x1, + RX_BCAST = 0x2, + RX_MCAST = 0x3, + RX_RED = 0x4, + RX_RED_OCTS = 0x5, + RX_ORUN = 0x6, + RX_ORUN_OCTS = 0x7, + RX_FCS = 0x8, + RX_L2ERR = 0x9, + RX_DRP_BCAST = 0xa, + RX_DRP_MCAST = 0xb, + RX_DRP_L3BCAST = 0xc, + RX_DRP_L3MCAST = 0xd, + RX_STATS_ENUM_LAST, +}; + +enum tx_stats_reg_offset { + TX_OCTS = 0x0, + TX_UCAST = 0x1, + TX_BCAST = 0x2, + TX_MCAST = 0x3, + TX_DROP = 0x4, + TX_STATS_ENUM_LAST, +}; + +struct nicvf_hw_stats { + u64 rx_bytes_ok; + u64 rx_ucast_frames_ok; + u64 rx_bcast_frames_ok; + u64 rx_mcast_frames_ok; + u64 rx_fcs_errors; + u64 rx_l2_errors; + u64 rx_drop_red; + u64 rx_drop_red_bytes; + u64 rx_drop_overrun; + u64 rx_drop_overrun_bytes; + u64 rx_drop_bcast; + u64 rx_drop_mcast; + u64 rx_drop_l3_bcast; + u64 rx_drop_l3_mcast; + u64 tx_bytes_ok; + u64 tx_ucast_frames_ok; + u64 tx_bcast_frames_ok; + u64 tx_mcast_frames_ok; + u64 tx_drops; +}; + +struct nicvf_drv_stats { + /* Rx */ + u64 rx_frames_ok; + u64 rx_frames_64; + u64 rx_frames_127; + u64 rx_frames_255; + u64 rx_frames_511; + u64 rx_frames_1023; + u64 rx_frames_1518; + u64 rx_frames_jumbo; + u64 rx_drops; + /* Tx */ + u64 tx_frames_ok; + u64 tx_drops; + u64 tx_busy; + u64 tx_tso; +}; + +struct nicvf { + struct net_device *netdev; + struct pci_dev *pdev; + u8 vf_id; + u8 node; + u8 tns_mode; + u16 mtu; + struct queue_set *qs; + void __iomem *reg_base; + bool link_up; + u8 duplex; + u32 speed; + struct page *rb_page; + u32 rb_page_offset; + bool rb_alloc_fail; + bool rb_work_scheduled; + struct delayed_work rbdr_work; + struct tasklet_struct rbdr_task; + struct tasklet_struct qs_err_task; + struct tasklet_struct cq_task; + struct nicvf_cq_poll *napi[8]; + struct nicvf_rss_info rss_info; + u8 cpi_alg; + /* Interrupt coalescing settings */ + u32 cq_coalesce_usecs; + + u32 msg_enable; + struct nicvf_hw_stats stats; + struct nicvf_drv_stats drv_stats; + struct bgx_stats bgx_stats; + struct work_struct reset_task; + + /* MSI-X */ + bool msix_enabled; + u8 num_vec; + struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; + char irq_name[NIC_VF_MSIX_VECTORS][20]; + bool irq_allocated[NIC_VF_MSIX_VECTORS]; + + bool pf_ready_to_rcv_msg; + bool pf_acked; + bool pf_nacked; + bool bgx_stats_acked; +} ____cacheline_aligned_in_smp; + +/* PF <--> VF Mailbox communication + * Eight 64bit registers are shared between PF and VF. + * Separate set for each VF. + * Writing '1' into last register mbx7 means end of message. + */ + +/* PF <--> VF mailbox communication */ +#define NIC_PF_VF_MAILBOX_SIZE 2 +#define NIC_MBOX_MSG_TIMEOUT 2000 /* ms */ + +/* Mailbox message types */ +#define NIC_MBOX_MSG_READY 0x01 /* Is PF ready to rcv msgs */ +#define NIC_MBOX_MSG_ACK 0x02 /* ACK the message received */ +#define NIC_MBOX_MSG_NACK 0x03 /* NACK the message received */ +#define NIC_MBOX_MSG_QS_CFG 0x04 /* Configure Qset */ +#define NIC_MBOX_MSG_RQ_CFG 0x05 /* Configure receive queue */ +#define NIC_MBOX_MSG_SQ_CFG 0x06 /* Configure Send queue */ +#define NIC_MBOX_MSG_RQ_DROP_CFG 0x07 /* Configure receive queue */ +#define NIC_MBOX_MSG_SET_MAC 0x08 /* Add MAC ID to DMAC filter */ +#define NIC_MBOX_MSG_SET_MAX_FRS 0x09 /* Set max frame size */ +#define NIC_MBOX_MSG_CPI_CFG 0x0A /* Config CPI, RSSI */ +#define NIC_MBOX_MSG_RSS_SIZE 0x0B /* Get RSS indir_tbl size */ +#define NIC_MBOX_MSG_RSS_CFG 0x0C /* Config RSS table */ +#define NIC_MBOX_MSG_RSS_CFG_CONT 0x0D /* RSS config continuation */ +#define NIC_MBOX_MSG_RQ_BP_CFG 0x0E /* RQ backpressure config */ +#define NIC_MBOX_MSG_RQ_SW_SYNC 0x0F /* Flush inflight pkts to RQ */ +#define NIC_MBOX_MSG_BGX_STATS 0x10 /* Get stats from BGX */ +#define NIC_MBOX_MSG_BGX_LINK_CHANGE 0x11 /* BGX:LMAC link status */ +#define NIC_MBOX_MSG_CFG_DONE 0x12 /* VF configuration done */ +#define NIC_MBOX_MSG_SHUTDOWN 0x13 /* VF is being shutdown */ + +struct nic_cfg_msg { + u8 msg; + u8 vf_id; + u8 tns_mode; + u8 node_id; + u64 mac_addr; +}; + +/* Qset configuration */ +struct qs_cfg_msg { + u8 msg; + u8 num; + u64 cfg; +}; + +/* Receive queue configuration */ +struct rq_cfg_msg { + u8 msg; + u8 qs_num; + u8 rq_num; + u64 cfg; +}; + +/* Send queue configuration */ +struct sq_cfg_msg { + u8 msg; + u8 qs_num; + u8 sq_num; + u64 cfg; +}; + +/* Set VF's MAC address */ +struct set_mac_msg { + u8 msg; + u8 vf_id; + u64 addr; +}; + +/* Set Maximum frame size */ +struct set_frs_msg { + u8 msg; + u8 vf_id; + u16 max_frs; +}; + +/* Set CPI algorithm type */ +struct cpi_cfg_msg { + u8 msg; + u8 vf_id; + u8 rq_cnt; + u8 cpi_alg; +}; + +/* Get RSS table size */ +struct rss_sz_msg { + u8 msg; + u8 vf_id; + u16 ind_tbl_size; +}; + +/* Set RSS configuration */ +struct rss_cfg_msg { + u8 msg; + u8 vf_id; + u8 hash_bits; + u8 tbl_len; + u8 tbl_offset; +#define RSS_IND_TBL_LEN_PER_MBX_MSG 8 + u8 ind_tbl[RSS_IND_TBL_LEN_PER_MBX_MSG]; +}; + +struct bgx_stats_msg { + u8 msg; + u8 vf_id; + u8 rx; + u8 idx; + u64 stats; +}; + +/* Physical interface link status */ +struct bgx_link_status { + u8 msg; + u8 link_up; + u8 duplex; + u32 speed; +}; + +/* 128 bit shared memory between PF and each VF */ +union nic_mbx { + struct { u8 msg; } msg; + struct nic_cfg_msg nic_cfg; + struct qs_cfg_msg qs; + struct rq_cfg_msg rq; + struct sq_cfg_msg sq; + struct set_mac_msg mac; + struct set_frs_msg frs; + struct cpi_cfg_msg cpi_cfg; + struct rss_sz_msg rss_size; + struct rss_cfg_msg rss_cfg; + struct bgx_stats_msg bgx_stats; + struct bgx_link_status link_status; +}; + +int nicvf_set_real_num_queues(struct net_device *netdev, + int tx_queues, int rx_queues); +int nicvf_open(struct net_device *netdev); +int nicvf_stop(struct net_device *netdev); +int nicvf_send_msg_to_pf(struct nicvf *vf, union nic_mbx *mbx); +void nicvf_config_cpi(struct nicvf *nic); +void nicvf_config_rss(struct nicvf *nic); +void nicvf_set_rss_key(struct nicvf *nic); +void nicvf_free_skb(struct nicvf *nic, struct sk_buff *skb); +void nicvf_set_ethtool_ops(struct net_device *netdev); +void nicvf_update_stats(struct nicvf *nic); +void nicvf_update_lmac_stats(struct nicvf *nic); + +#endif /* NIC_H */ diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c new file mode 100644 index 000000000000..0f1f58b54bf1 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -0,0 +1,940 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/etherdevice.h> +#include <linux/of.h> + +#include "nic_reg.h" +#include "nic.h" +#include "q_struct.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-nic" +#define DRV_VERSION "1.0" + +struct nicpf { + struct pci_dev *pdev; + u8 rev_id; +#define NIC_NODE_ID_MASK 0x300000000000 +#define NIC_NODE_ID(x) ((x & NODE_ID_MASK) >> 44) + u8 node; + unsigned int flags; + u8 num_vf_en; /* No of VF enabled */ + bool vf_enabled[MAX_NUM_VFS_SUPPORTED]; + void __iomem *reg_base; /* Register start address */ + struct pkind_cfg pkind; +#define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF)) +#define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) +#define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) + u8 vf_lmac_map[MAX_LMAC]; + struct delayed_work dwork; + struct workqueue_struct *check_link; + u8 link[MAX_LMAC]; + u8 duplex[MAX_LMAC]; + u32 speed[MAX_LMAC]; + u16 cpi_base[MAX_NUM_VFS_SUPPORTED]; + u16 rss_ind_tbl_size; + bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; + + /* MSI-X */ + bool msix_enabled; + u8 num_vec; + struct msix_entry msix_entries[NIC_PF_MSIX_VECTORS]; + bool irq_allocated[NIC_PF_MSIX_VECTORS]; +}; + +/* Supported devices */ +static const struct pci_device_id nic_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_PF) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Sunil Goutham"); +MODULE_DESCRIPTION("Cavium Thunder NIC Physical Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, nic_id_table); + +/* The Cavium ThunderX network controller can *only* be found in SoCs + * containing the ThunderX ARM64 CPU implementation. All accesses to the device + * registers on this platform are implicitly strongly ordered with respect + * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use + * with no memory barriers in this driver. The readq()/writeq() functions add + * explicit ordering operation which in this case are redundant, and only + * add overhead. + */ + +/* Register read/write APIs */ +static void nic_reg_write(struct nicpf *nic, u64 offset, u64 val) +{ + writeq_relaxed(val, nic->reg_base + offset); +} + +static u64 nic_reg_read(struct nicpf *nic, u64 offset) +{ + return readq_relaxed(nic->reg_base + offset); +} + +/* PF -> VF mailbox communication APIs */ +static void nic_enable_mbx_intr(struct nicpf *nic) +{ + /* Enable mailbox interrupt for all 128 VFs */ + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull); + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull); +} + +static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg) +{ + nic_reg_write(nic, NIC_PF_MAILBOX_INT + (mbx_reg << 3), BIT_ULL(vf)); +} + +static u64 nic_get_mbx_addr(int vf) +{ + return NIC_PF_VF_0_127_MAILBOX_0_1 + (vf << NIC_VF_NUM_SHIFT); +} + +/* Send a mailbox message to VF + * @vf: vf to which this message to be sent + * @mbx: Message to be sent + */ +static void nic_send_msg_to_vf(struct nicpf *nic, int vf, union nic_mbx *mbx) +{ + void __iomem *mbx_addr = nic->reg_base + nic_get_mbx_addr(vf); + u64 *msg = (u64 *)mbx; + + /* In first revision HW, mbox interrupt is triggerred + * when PF writes to MBOX(1), in next revisions when + * PF writes to MBOX(0) + */ + if (nic->rev_id == 0) { + /* see the comment for nic_reg_write()/nic_reg_read() + * functions above + */ + writeq_relaxed(msg[0], mbx_addr); + writeq_relaxed(msg[1], mbx_addr + 8); + } else { + writeq_relaxed(msg[1], mbx_addr + 8); + writeq_relaxed(msg[0], mbx_addr); + } +} + +/* Responds to VF's READY message with VF's + * ID, node, MAC address e.t.c + * @vf: VF which sent READY message + */ +static void nic_mbx_send_ready(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + int bgx_idx, lmac; + const char *mac; + + mbx.nic_cfg.msg = NIC_MBOX_MSG_READY; + mbx.nic_cfg.vf_id = vf; + + mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE; + + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + + mac = bgx_get_lmac_mac(nic->node, bgx_idx, lmac); + if (mac) + ether_addr_copy((u8 *)&mbx.nic_cfg.mac_addr, mac); + + mbx.nic_cfg.node_id = nic->node; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* ACKs VF's mailbox message + * @vf: VF to which ACK to be sent + */ +static void nic_mbx_send_ack(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_ACK; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* NACKs VF's mailbox message that PF is not able to + * complete the action + * @vf: VF to which ACK to be sent + */ +static void nic_mbx_send_nack(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_NACK; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* Flush all in flight receive packets to memory and + * bring down an active RQ + */ +static int nic_rcv_queue_sw_sync(struct nicpf *nic) +{ + u16 timeout = ~0x00; + + nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x01); + /* Wait till sync cycle is finished */ + while (timeout) { + if (nic_reg_read(nic, NIC_PF_SW_SYNC_RX_DONE) & 0x1) + break; + timeout--; + } + nic_reg_write(nic, NIC_PF_SW_SYNC_RX, 0x00); + if (!timeout) { + dev_err(&nic->pdev->dev, "Receive queue software sync failed"); + return 1; + } + return 0; +} + +/* Get BGX Rx/Tx stats and respond to VF's request */ +static void nic_get_bgx_stats(struct nicpf *nic, struct bgx_stats_msg *bgx) +{ + int bgx_idx, lmac; + union nic_mbx mbx = {}; + + bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[bgx->vf_id]); + + mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; + mbx.bgx_stats.vf_id = bgx->vf_id; + mbx.bgx_stats.rx = bgx->rx; + mbx.bgx_stats.idx = bgx->idx; + if (bgx->rx) + mbx.bgx_stats.stats = bgx_get_rx_stats(nic->node, bgx_idx, + lmac, bgx->idx); + else + mbx.bgx_stats.stats = bgx_get_tx_stats(nic->node, bgx_idx, + lmac, bgx->idx); + nic_send_msg_to_vf(nic, bgx->vf_id, &mbx); +} + +/* Update hardware min/max frame size */ +static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf) +{ + if ((new_frs > NIC_HW_MAX_FRS) || (new_frs < NIC_HW_MIN_FRS)) { + dev_err(&nic->pdev->dev, + "Invalid MTU setting from VF%d rejected, should be between %d and %d\n", + vf, NIC_HW_MIN_FRS, NIC_HW_MAX_FRS); + return 1; + } + new_frs += ETH_HLEN; + if (new_frs <= nic->pkind.maxlen) + return 0; + + nic->pkind.maxlen = new_frs; + nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG, *(u64 *)&nic->pkind); + return 0; +} + +/* Set minimum transmit packet size */ +static void nic_set_tx_pkt_pad(struct nicpf *nic, int size) +{ + int lmac; + u64 lmac_cfg; + + /* Max value that can be set is 60 */ + if (size > 60) + size = 60; + + for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) { + lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3)); + lmac_cfg &= ~(0xF << 2); + lmac_cfg |= ((size / 4) << 2); + nic_reg_write(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3), lmac_cfg); + } +} + +/* Function to check number of LMACs present and set VF::LMAC mapping. + * Mapping will be used while initializing channels. + */ +static void nic_set_lmac_vf_mapping(struct nicpf *nic) +{ + unsigned bgx_map = bgx_get_map(nic->node); + int bgx, next_bgx_lmac = 0; + int lmac, lmac_cnt = 0; + u64 lmac_credit; + + nic->num_vf_en = 0; + + for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) { + if (!(bgx_map & (1 << bgx))) + continue; + lmac_cnt = bgx_get_lmac_count(nic->node, bgx); + for (lmac = 0; lmac < lmac_cnt; lmac++) + nic->vf_lmac_map[next_bgx_lmac++] = + NIC_SET_VF_LMAC_MAP(bgx, lmac); + nic->num_vf_en += lmac_cnt; + + /* Program LMAC credits */ + lmac_credit = (1ull << 1); /* channel credit enable */ + lmac_credit |= (0x1ff << 2); /* Max outstanding pkt count */ + /* 48KB BGX Tx buffer size, each unit is of size 16bytes */ + lmac_credit |= (((((48 * 1024) / lmac_cnt) - + NIC_HW_MAX_FRS) / 16) << 12); + lmac = bgx * MAX_LMAC_PER_BGX; + for (; lmac < lmac_cnt + (bgx * MAX_LMAC_PER_BGX); lmac++) + nic_reg_write(nic, + NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), + lmac_credit); + } +} + +#define BGX0_BLOCK 8 +#define BGX1_BLOCK 9 + +static void nic_init_hw(struct nicpf *nic) +{ + int i; + + /* Reset NIC, in case the driver is repeatedly inserted and removed */ + nic_reg_write(nic, NIC_PF_SOFT_RESET, 1); + + /* Enable NIC HW block */ + nic_reg_write(nic, NIC_PF_CFG, 0x3); + + /* Enable backpressure */ + nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03); + + /* Disable TNS mode on both interfaces */ + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, + (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), + (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG, + (1ULL << 63) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8), + (1ULL << 63) | BGX1_BLOCK); + + /* PKIND configuration */ + nic->pkind.minlen = 0; + nic->pkind.maxlen = NIC_HW_MAX_FRS + ETH_HLEN; + nic->pkind.lenerr_en = 1; + nic->pkind.rx_hdr = 0; + nic->pkind.hdr_sl = 0; + + for (i = 0; i < NIC_MAX_PKIND; i++) + nic_reg_write(nic, NIC_PF_PKIND_0_15_CFG | (i << 3), + *(u64 *)&nic->pkind); + + nic_set_tx_pkt_pad(nic, NIC_HW_MIN_FRS); + + /* Timer config */ + nic_reg_write(nic, NIC_PF_INTR_TIMER_CFG, NICPF_CLK_PER_INT_TICK); +} + +/* Channel parse index configuration */ +static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) +{ + u32 vnic, bgx, lmac, chan; + u32 padd, cpi_count = 0; + u64 cpi_base, cpi, rssi_base, rssi; + u8 qset, rq_idx = 0; + + vnic = cfg->vf_id; + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + + chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); + cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX); + rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX); + + /* Rx channel configuration */ + nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3), + (1ull << 63) | (vnic << 0)); + nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_CFG | (chan << 3), + ((u64)cfg->cpi_alg << 62) | (cpi_base << 48)); + + if (cfg->cpi_alg == CPI_ALG_NONE) + cpi_count = 1; + else if (cfg->cpi_alg == CPI_ALG_VLAN) /* 3 bits of PCP */ + cpi_count = 8; + else if (cfg->cpi_alg == CPI_ALG_VLAN16) /* 3 bits PCP + DEI */ + cpi_count = 16; + else if (cfg->cpi_alg == CPI_ALG_DIFF) /* 6bits DSCP */ + cpi_count = NIC_MAX_CPI_PER_LMAC; + + /* RSS Qset, Qidx mapping */ + qset = cfg->vf_id; + rssi = rssi_base; + for (; rssi < (rssi_base + cfg->rq_cnt); rssi++) { + nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3), + (qset << 3) | rq_idx); + rq_idx++; + } + + rssi = 0; + cpi = cpi_base; + for (; cpi < (cpi_base + cpi_count); cpi++) { + /* Determine port to channel adder */ + if (cfg->cpi_alg != CPI_ALG_DIFF) + padd = cpi % cpi_count; + else + padd = cpi % 8; /* 3 bits CS out of 6bits DSCP */ + + /* Leave RSS_SIZE as '0' to disable RSS */ + nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi << 3), + (vnic << 24) | (padd << 16) | (rssi_base + rssi)); + + if ((rssi + 1) >= cfg->rq_cnt) + continue; + + if (cfg->cpi_alg == CPI_ALG_VLAN) + rssi++; + else if (cfg->cpi_alg == CPI_ALG_VLAN16) + rssi = ((cpi - cpi_base) & 0xe) >> 1; + else if (cfg->cpi_alg == CPI_ALG_DIFF) + rssi = ((cpi - cpi_base) & 0x38) >> 3; + } + nic->cpi_base[cfg->vf_id] = cpi_base; +} + +/* Responsds to VF with its RSS indirection table size */ +static void nic_send_rss_size(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + u64 *msg; + + msg = (u64 *)&mbx; + + mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; + mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size; + nic_send_msg_to_vf(nic, vf, &mbx); +} + +/* Receive side scaling configuration + * configure: + * - RSS index + * - indir table i.e hash::RQ mapping + * - no of hash bits to consider + */ +static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) +{ + u8 qset, idx = 0; + u64 cpi_cfg, cpi_base, rssi_base, rssi; + + cpi_base = nic->cpi_base[cfg->vf_id]; + cpi_cfg = nic_reg_read(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3)); + rssi_base = (cpi_cfg & 0x0FFF) + cfg->tbl_offset; + + rssi = rssi_base; + qset = cfg->vf_id; + + for (; rssi < (rssi_base + cfg->tbl_len); rssi++) { + nic_reg_write(nic, NIC_PF_RSSI_0_4097_RQ | (rssi << 3), + (qset << 3) | (cfg->ind_tbl[idx] & 0x7)); + idx++; + } + + cpi_cfg &= ~(0xFULL << 20); + cpi_cfg |= (cfg->hash_bits << 20); + nic_reg_write(nic, NIC_PF_CPI_0_2047_CFG | (cpi_base << 3), cpi_cfg); +} + +/* 4 level transmit side scheduler configutation + * for TNS bypass mode + * + * Sample configuration for SQ0 + * VNIC0-SQ0 -> TL4(0) -> TL3[0] -> TL2[0] -> TL1[0] -> BGX0 + * VNIC1-SQ0 -> TL4(8) -> TL3[2] -> TL2[0] -> TL1[0] -> BGX0 + * VNIC2-SQ0 -> TL4(16) -> TL3[4] -> TL2[1] -> TL1[0] -> BGX0 + * VNIC3-SQ0 -> TL4(24) -> TL3[6] -> TL2[1] -> TL1[0] -> BGX0 + * VNIC4-SQ0 -> TL4(512) -> TL3[128] -> TL2[32] -> TL1[1] -> BGX1 + * VNIC5-SQ0 -> TL4(520) -> TL3[130] -> TL2[32] -> TL1[1] -> BGX1 + * VNIC6-SQ0 -> TL4(528) -> TL3[132] -> TL2[33] -> TL1[1] -> BGX1 + * VNIC7-SQ0 -> TL4(536) -> TL3[134] -> TL2[33] -> TL1[1] -> BGX1 + */ +static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u8 sq_idx) +{ + u32 bgx, lmac, chan; + u32 tl2, tl3, tl4; + u32 rr_quantum; + + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); + /* 24 bytes for FCS, IPG and preamble */ + rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4); + + tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); + tl4 += sq_idx; + tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3); + nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 | + ((u64)vnic << NIC_QS_ID_SHIFT) | + ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4); + nic_reg_write(nic, NIC_PF_TL4_0_1023_CFG | (tl4 << 3), + ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum); + + nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum); + chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); + nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan); + /* Enable backpressure on the channel */ + nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1); + + tl2 = tl3 >> 2; + nic_reg_write(nic, NIC_PF_TL3A_0_63_CFG | (tl2 << 3), tl2); + nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum); + /* No priorities as of now */ + nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00); +} + +/* Interrupt handler to handle mailbox messages from VFs */ +static void nic_handle_mbx_intr(struct nicpf *nic, int vf) +{ + union nic_mbx mbx = {}; + u64 *mbx_data; + u64 mbx_addr; + u64 reg_addr; + u64 mac_addr; + int bgx, lmac; + int i; + int ret = 0; + + nic->mbx_lock[vf] = true; + + mbx_addr = nic_get_mbx_addr(vf); + mbx_data = (u64 *)&mbx; + + for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { + *mbx_data = nic_reg_read(nic, mbx_addr); + mbx_data++; + mbx_addr += sizeof(u64); + } + + dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n", + __func__, mbx.msg.msg, vf); + switch (mbx.msg.msg) { + case NIC_MBOX_MSG_READY: + nic_mbx_send_ready(nic, vf); + nic->link[vf] = 0; + nic->duplex[vf] = 0; + nic->speed[vf] = 0; + ret = 1; + break; + case NIC_MBOX_MSG_QS_CFG: + reg_addr = NIC_PF_QSET_0_127_CFG | + (mbx.qs.num << NIC_QS_ID_SHIFT); + nic_reg_write(nic, reg_addr, mbx.qs.cfg); + break; + case NIC_MBOX_MSG_RQ_CFG: + reg_addr = NIC_PF_QSET_0_127_RQ_0_7_CFG | + (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.rq.cfg); + break; + case NIC_MBOX_MSG_RQ_BP_CFG: + reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG | + (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.rq.cfg); + break; + case NIC_MBOX_MSG_RQ_SW_SYNC: + ret = nic_rcv_queue_sw_sync(nic); + break; + case NIC_MBOX_MSG_RQ_DROP_CFG: + reg_addr = NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG | + (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.rq.cfg); + break; + case NIC_MBOX_MSG_SQ_CFG: + reg_addr = NIC_PF_QSET_0_127_SQ_0_7_CFG | + (mbx.sq.qs_num << NIC_QS_ID_SHIFT) | + (mbx.sq.sq_num << NIC_Q_NUM_SHIFT); + nic_reg_write(nic, reg_addr, mbx.sq.cfg); + nic_tx_channel_cfg(nic, mbx.qs.num, mbx.sq.sq_num); + break; + case NIC_MBOX_MSG_SET_MAC: + lmac = mbx.mac.vf_id; + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); +#ifdef __BIG_ENDIAN + mac_addr = cpu_to_be64(mbx.nic_cfg.mac_addr) << 16; +#else + mac_addr = cpu_to_be64(mbx.nic_cfg.mac_addr) >> 16; +#endif + bgx_set_lmac_mac(nic->node, bgx, lmac, (u8 *)&mac_addr); + break; + case NIC_MBOX_MSG_SET_MAX_FRS: + ret = nic_update_hw_frs(nic, mbx.frs.max_frs, + mbx.frs.vf_id); + break; + case NIC_MBOX_MSG_CPI_CFG: + nic_config_cpi(nic, &mbx.cpi_cfg); + break; + case NIC_MBOX_MSG_RSS_SIZE: + nic_send_rss_size(nic, vf); + goto unlock; + case NIC_MBOX_MSG_RSS_CFG: + case NIC_MBOX_MSG_RSS_CFG_CONT: + nic_config_rss(nic, &mbx.rss_cfg); + break; + case NIC_MBOX_MSG_CFG_DONE: + /* Last message of VF config msg sequence */ + nic->vf_enabled[vf] = true; + goto unlock; + case NIC_MBOX_MSG_SHUTDOWN: + /* First msg in VF teardown sequence */ + nic->vf_enabled[vf] = false; + break; + case NIC_MBOX_MSG_BGX_STATS: + nic_get_bgx_stats(nic, &mbx.bgx_stats); + goto unlock; + default: + dev_err(&nic->pdev->dev, + "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg); + break; + } + + if (!ret) + nic_mbx_send_ack(nic, vf); + else if (mbx.msg.msg != NIC_MBOX_MSG_READY) + nic_mbx_send_nack(nic, vf); +unlock: + nic->mbx_lock[vf] = false; +} + +static void nic_mbx_intr_handler (struct nicpf *nic, int mbx) +{ + u64 intr; + u8 vf, vf_per_mbx_reg = 64; + + intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3)); + dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr); + for (vf = 0; vf < vf_per_mbx_reg; vf++) { + if (intr & (1ULL << vf)) { + dev_dbg(&nic->pdev->dev, "Intr from VF %d\n", + vf + (mbx * vf_per_mbx_reg)); + if ((vf + (mbx * vf_per_mbx_reg)) > nic->num_vf_en) + break; + nic_handle_mbx_intr(nic, vf + (mbx * vf_per_mbx_reg)); + nic_clear_mbx_intr(nic, vf, mbx); + } + } +} + +static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq) +{ + struct nicpf *nic = (struct nicpf *)nic_irq; + + nic_mbx_intr_handler(nic, 0); + + return IRQ_HANDLED; +} + +static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq) +{ + struct nicpf *nic = (struct nicpf *)nic_irq; + + nic_mbx_intr_handler(nic, 1); + + return IRQ_HANDLED; +} + +static int nic_enable_msix(struct nicpf *nic) +{ + int i, ret; + + nic->num_vec = NIC_PF_MSIX_VECTORS; + + for (i = 0; i < nic->num_vec; i++) + nic->msix_entries[i].entry = i; + + ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); + if (ret) { + dev_err(&nic->pdev->dev, + "Request for #%d msix vectors failed\n", + nic->num_vec); + return ret; + } + + nic->msix_enabled = 1; + return 0; +} + +static void nic_disable_msix(struct nicpf *nic) +{ + if (nic->msix_enabled) { + pci_disable_msix(nic->pdev); + nic->msix_enabled = 0; + nic->num_vec = 0; + } +} + +static void nic_free_all_interrupts(struct nicpf *nic) +{ + int irq; + + for (irq = 0; irq < nic->num_vec; irq++) { + if (nic->irq_allocated[irq]) + free_irq(nic->msix_entries[irq].vector, nic); + nic->irq_allocated[irq] = false; + } +} + +static int nic_register_interrupts(struct nicpf *nic) +{ + int ret; + + /* Enable MSI-X */ + ret = nic_enable_msix(nic); + if (ret) + return ret; + + /* Register mailbox interrupt handlers */ + ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector, + nic_mbx0_intr_handler, 0, "NIC Mbox0", nic); + if (ret) + goto fail; + + nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true; + + ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector, + nic_mbx1_intr_handler, 0, "NIC Mbox1", nic); + if (ret) + goto fail; + + nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true; + + /* Enable mailbox interrupt */ + nic_enable_mbx_intr(nic); + return 0; + +fail: + dev_err(&nic->pdev->dev, "Request irq failed\n"); + nic_free_all_interrupts(nic); + return ret; +} + +static void nic_unregister_interrupts(struct nicpf *nic) +{ + nic_free_all_interrupts(nic); + nic_disable_msix(nic); +} + +static int nic_sriov_init(struct pci_dev *pdev, struct nicpf *nic) +{ + int pos = 0; + int err; + u16 total_vf_cnt; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) { + dev_err(&pdev->dev, "SRIOV capability is not found in PCIe config space\n"); + return -ENODEV; + } + + pci_read_config_word(pdev, (pos + PCI_SRIOV_TOTAL_VF), &total_vf_cnt); + if (total_vf_cnt < nic->num_vf_en) + nic->num_vf_en = total_vf_cnt; + + if (!total_vf_cnt) + return 0; + + err = pci_enable_sriov(pdev, nic->num_vf_en); + if (err) { + dev_err(&pdev->dev, "SRIOV enable failed, num VF is %d\n", + nic->num_vf_en); + nic->num_vf_en = 0; + return err; + } + + dev_info(&pdev->dev, "SRIOV enabled, number of VF available %d\n", + nic->num_vf_en); + + nic->flags |= NIC_SRIOV_ENABLED; + return 0; +} + +/* Poll for BGX LMAC link status and update corresponding VF + * if there is a change, valid only if internal L2 switch + * is not present otherwise VF link is always treated as up + */ +static void nic_poll_for_link(struct work_struct *work) +{ + union nic_mbx mbx = {}; + struct nicpf *nic; + struct bgx_link_status link; + u8 vf, bgx, lmac; + + nic = container_of(work, struct nicpf, dwork.work); + + mbx.link_status.msg = NIC_MBOX_MSG_BGX_LINK_CHANGE; + + for (vf = 0; vf < nic->num_vf_en; vf++) { + /* Poll only if VF is UP */ + if (!nic->vf_enabled[vf]) + continue; + + /* Get BGX, LMAC indices for the VF */ + bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); + /* Get interface link status */ + bgx_get_lmac_link_state(nic->node, bgx, lmac, &link); + + /* Inform VF only if link status changed */ + if (nic->link[vf] == link.link_up) + continue; + + if (!nic->mbx_lock[vf]) { + nic->link[vf] = link.link_up; + nic->duplex[vf] = link.duplex; + nic->speed[vf] = link.speed; + + /* Send a mbox message to VF with current link status */ + mbx.link_status.link_up = link.link_up; + mbx.link_status.duplex = link.duplex; + mbx.link_status.speed = link.speed; + nic_send_msg_to_vf(nic, vf, &mbx); + } + } + queue_delayed_work(nic->check_link, &nic->dwork, HZ * 2); +} + +static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct nicpf *nic; + int err; + + BUILD_BUG_ON(sizeof(union nic_mbx) > 16); + + nic = devm_kzalloc(dev, sizeof(*nic), GFP_KERNEL); + if (!nic) + return -ENOMEM; + + pci_set_drvdata(pdev, nic); + + nic->pdev = pdev; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get 48-bit DMA for consistent allocations\n"); + goto err_release_regions; + } + + /* MAP PF's configuration registers */ + nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!nic->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + pci_read_config_byte(pdev, PCI_REVISION_ID, &nic->rev_id); + + nic->node = NIC_NODE_ID(pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM)); + + nic_set_lmac_vf_mapping(nic); + + /* Initialize hardware */ + nic_init_hw(nic); + + /* Set RSS TBL size for each VF */ + nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE; + + /* Register interrupts */ + err = nic_register_interrupts(nic); + if (err) + goto err_release_regions; + + /* Configure SRIOV */ + err = nic_sriov_init(pdev, nic); + if (err) + goto err_unregister_interrupts; + + /* Register a physical link status poll fn() */ + nic->check_link = alloc_workqueue("check_link_status", + WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + if (!nic->check_link) { + err = -ENOMEM; + goto err_disable_sriov; + } + + INIT_DELAYED_WORK(&nic->dwork, nic_poll_for_link); + queue_delayed_work(nic->check_link, &nic->dwork, 0); + + return 0; + +err_disable_sriov: + if (nic->flags & NIC_SRIOV_ENABLED) + pci_disable_sriov(pdev); +err_unregister_interrupts: + nic_unregister_interrupts(nic); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void nic_remove(struct pci_dev *pdev) +{ + struct nicpf *nic = pci_get_drvdata(pdev); + + if (nic->flags & NIC_SRIOV_ENABLED) + pci_disable_sriov(pdev); + + if (nic->check_link) { + /* Destroy work Queue */ + cancel_delayed_work(&nic->dwork); + flush_workqueue(nic->check_link); + destroy_workqueue(nic->check_link); + } + + nic_unregister_interrupts(nic); + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_driver nic_driver = { + .name = DRV_NAME, + .id_table = nic_id_table, + .probe = nic_probe, + .remove = nic_remove, +}; + +static int __init nic_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&nic_driver); +} + +static void __exit nic_cleanup_module(void) +{ + pci_unregister_driver(&nic_driver); +} + +module_init(nic_init_module); +module_exit(nic_cleanup_module); diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h new file mode 100644 index 000000000000..58197bb2f805 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -0,0 +1,213 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef NIC_REG_H +#define NIC_REG_H + +#define NIC_PF_REG_COUNT 29573 +#define NIC_VF_REG_COUNT 249 + +/* Physical function register offsets */ +#define NIC_PF_CFG (0x0000) +#define NIC_PF_STATUS (0x0010) +#define NIC_PF_INTR_TIMER_CFG (0x0030) +#define NIC_PF_BIST_STATUS (0x0040) +#define NIC_PF_SOFT_RESET (0x0050) +#define NIC_PF_TCP_TIMER (0x0060) +#define NIC_PF_BP_CFG (0x0080) +#define NIC_PF_RRM_CFG (0x0088) +#define NIC_PF_CQM_CF (0x00A0) +#define NIC_PF_CNM_CF (0x00A8) +#define NIC_PF_CNM_STATUS (0x00B0) +#define NIC_PF_CQ_AVG_CFG (0x00C0) +#define NIC_PF_RRM_AVG_CFG (0x00C8) +#define NIC_PF_INTF_0_1_SEND_CFG (0x0200) +#define NIC_PF_INTF_0_1_BP_CFG (0x0208) +#define NIC_PF_INTF_0_1_BP_DIS_0_1 (0x0210) +#define NIC_PF_INTF_0_1_BP_SW_0_1 (0x0220) +#define NIC_PF_RBDR_BP_STATE_0_3 (0x0240) +#define NIC_PF_MAILBOX_INT (0x0410) +#define NIC_PF_MAILBOX_INT_W1S (0x0430) +#define NIC_PF_MAILBOX_ENA_W1C (0x0450) +#define NIC_PF_MAILBOX_ENA_W1S (0x0470) +#define NIC_PF_RX_ETYPE_0_7 (0x0500) +#define NIC_PF_PKIND_0_15_CFG (0x0600) +#define NIC_PF_ECC0_FLIP0 (0x1000) +#define NIC_PF_ECC1_FLIP0 (0x1008) +#define NIC_PF_ECC2_FLIP0 (0x1010) +#define NIC_PF_ECC3_FLIP0 (0x1018) +#define NIC_PF_ECC0_FLIP1 (0x1080) +#define NIC_PF_ECC1_FLIP1 (0x1088) +#define NIC_PF_ECC2_FLIP1 (0x1090) +#define NIC_PF_ECC3_FLIP1 (0x1098) +#define NIC_PF_ECC0_CDIS (0x1100) +#define NIC_PF_ECC1_CDIS (0x1108) +#define NIC_PF_ECC2_CDIS (0x1110) +#define NIC_PF_ECC3_CDIS (0x1118) +#define NIC_PF_BIST0_STATUS (0x1280) +#define NIC_PF_BIST1_STATUS (0x1288) +#define NIC_PF_BIST2_STATUS (0x1290) +#define NIC_PF_BIST3_STATUS (0x1298) +#define NIC_PF_ECC0_SBE_INT (0x2000) +#define NIC_PF_ECC0_SBE_INT_W1S (0x2008) +#define NIC_PF_ECC0_SBE_ENA_W1C (0x2010) +#define NIC_PF_ECC0_SBE_ENA_W1S (0x2018) +#define NIC_PF_ECC0_DBE_INT (0x2100) +#define NIC_PF_ECC0_DBE_INT_W1S (0x2108) +#define NIC_PF_ECC0_DBE_ENA_W1C (0x2110) +#define NIC_PF_ECC0_DBE_ENA_W1S (0x2118) +#define NIC_PF_ECC1_SBE_INT (0x2200) +#define NIC_PF_ECC1_SBE_INT_W1S (0x2208) +#define NIC_PF_ECC1_SBE_ENA_W1C (0x2210) +#define NIC_PF_ECC1_SBE_ENA_W1S (0x2218) +#define NIC_PF_ECC1_DBE_INT (0x2300) +#define NIC_PF_ECC1_DBE_INT_W1S (0x2308) +#define NIC_PF_ECC1_DBE_ENA_W1C (0x2310) +#define NIC_PF_ECC1_DBE_ENA_W1S (0x2318) +#define NIC_PF_ECC2_SBE_INT (0x2400) +#define NIC_PF_ECC2_SBE_INT_W1S (0x2408) +#define NIC_PF_ECC2_SBE_ENA_W1C (0x2410) +#define NIC_PF_ECC2_SBE_ENA_W1S (0x2418) +#define NIC_PF_ECC2_DBE_INT (0x2500) +#define NIC_PF_ECC2_DBE_INT_W1S (0x2508) +#define NIC_PF_ECC2_DBE_ENA_W1C (0x2510) +#define NIC_PF_ECC2_DBE_ENA_W1S (0x2518) +#define NIC_PF_ECC3_SBE_INT (0x2600) +#define NIC_PF_ECC3_SBE_INT_W1S (0x2608) +#define NIC_PF_ECC3_SBE_ENA_W1C (0x2610) +#define NIC_PF_ECC3_SBE_ENA_W1S (0x2618) +#define NIC_PF_ECC3_DBE_INT (0x2700) +#define NIC_PF_ECC3_DBE_INT_W1S (0x2708) +#define NIC_PF_ECC3_DBE_ENA_W1C (0x2710) +#define NIC_PF_ECC3_DBE_ENA_W1S (0x2718) +#define NIC_PF_CPI_0_2047_CFG (0x200000) +#define NIC_PF_RSSI_0_4097_RQ (0x220000) +#define NIC_PF_LMAC_0_7_CFG (0x240000) +#define NIC_PF_LMAC_0_7_SW_XOFF (0x242000) +#define NIC_PF_LMAC_0_7_CREDIT (0x244000) +#define NIC_PF_CHAN_0_255_TX_CFG (0x400000) +#define NIC_PF_CHAN_0_255_RX_CFG (0x420000) +#define NIC_PF_CHAN_0_255_SW_XOFF (0x440000) +#define NIC_PF_CHAN_0_255_CREDIT (0x460000) +#define NIC_PF_CHAN_0_255_RX_BP_CFG (0x480000) +#define NIC_PF_SW_SYNC_RX (0x490000) +#define NIC_PF_SW_SYNC_RX_DONE (0x490008) +#define NIC_PF_TL2_0_63_CFG (0x500000) +#define NIC_PF_TL2_0_63_PRI (0x520000) +#define NIC_PF_TL2_0_63_SH_STATUS (0x580000) +#define NIC_PF_TL3A_0_63_CFG (0x5F0000) +#define NIC_PF_TL3_0_255_CFG (0x600000) +#define NIC_PF_TL3_0_255_CHAN (0x620000) +#define NIC_PF_TL3_0_255_PIR (0x640000) +#define NIC_PF_TL3_0_255_SW_XOFF (0x660000) +#define NIC_PF_TL3_0_255_CNM_RATE (0x680000) +#define NIC_PF_TL3_0_255_SH_STATUS (0x6A0000) +#define NIC_PF_TL4A_0_255_CFG (0x6F0000) +#define NIC_PF_TL4_0_1023_CFG (0x800000) +#define NIC_PF_TL4_0_1023_SW_XOFF (0x820000) +#define NIC_PF_TL4_0_1023_SH_STATUS (0x840000) +#define NIC_PF_TL4A_0_1023_CNM_RATE (0x880000) +#define NIC_PF_TL4A_0_1023_CNM_STATUS (0x8A0000) +#define NIC_PF_VF_0_127_MAILBOX_0_1 (0x20002030) +#define NIC_PF_VNIC_0_127_TX_STAT_0_4 (0x20004000) +#define NIC_PF_VNIC_0_127_RX_STAT_0_13 (0x20004100) +#define NIC_PF_QSET_0_127_LOCK_0_15 (0x20006000) +#define NIC_PF_QSET_0_127_CFG (0x20010000) +#define NIC_PF_QSET_0_127_RQ_0_7_CFG (0x20010400) +#define NIC_PF_QSET_0_127_RQ_0_7_DROP_CFG (0x20010420) +#define NIC_PF_QSET_0_127_RQ_0_7_BP_CFG (0x20010500) +#define NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1 (0x20010600) +#define NIC_PF_QSET_0_127_SQ_0_7_CFG (0x20010C00) +#define NIC_PF_QSET_0_127_SQ_0_7_CFG2 (0x20010C08) +#define NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1 (0x20010D00) + +#define NIC_PF_MSIX_VEC_0_18_ADDR (0x000000) +#define NIC_PF_MSIX_VEC_0_CTL (0x000008) +#define NIC_PF_MSIX_PBA_0 (0x0F0000) + +/* Virtual function register offsets */ +#define NIC_VNIC_CFG (0x000020) +#define NIC_VF_PF_MAILBOX_0_1 (0x000130) +#define NIC_VF_INT (0x000200) +#define NIC_VF_INT_W1S (0x000220) +#define NIC_VF_ENA_W1C (0x000240) +#define NIC_VF_ENA_W1S (0x000260) + +#define NIC_VNIC_RSS_CFG (0x0020E0) +#define NIC_VNIC_RSS_KEY_0_4 (0x002200) +#define NIC_VNIC_TX_STAT_0_4 (0x004000) +#define NIC_VNIC_RX_STAT_0_13 (0x004100) +#define NIC_QSET_RQ_GEN_CFG (0x010010) + +#define NIC_QSET_CQ_0_7_CFG (0x010400) +#define NIC_QSET_CQ_0_7_CFG2 (0x010408) +#define NIC_QSET_CQ_0_7_THRESH (0x010410) +#define NIC_QSET_CQ_0_7_BASE (0x010420) +#define NIC_QSET_CQ_0_7_HEAD (0x010428) +#define NIC_QSET_CQ_0_7_TAIL (0x010430) +#define NIC_QSET_CQ_0_7_DOOR (0x010438) +#define NIC_QSET_CQ_0_7_STATUS (0x010440) +#define NIC_QSET_CQ_0_7_STATUS2 (0x010448) +#define NIC_QSET_CQ_0_7_DEBUG (0x010450) + +#define NIC_QSET_RQ_0_7_CFG (0x010600) +#define NIC_QSET_RQ_0_7_STAT_0_1 (0x010700) + +#define NIC_QSET_SQ_0_7_CFG (0x010800) +#define NIC_QSET_SQ_0_7_THRESH (0x010810) +#define NIC_QSET_SQ_0_7_BASE (0x010820) +#define NIC_QSET_SQ_0_7_HEAD (0x010828) +#define NIC_QSET_SQ_0_7_TAIL (0x010830) +#define NIC_QSET_SQ_0_7_DOOR (0x010838) +#define NIC_QSET_SQ_0_7_STATUS (0x010840) +#define NIC_QSET_SQ_0_7_DEBUG (0x010848) +#define NIC_QSET_SQ_0_7_CNM_CHG (0x010860) +#define NIC_QSET_SQ_0_7_STAT_0_1 (0x010900) + +#define NIC_QSET_RBDR_0_1_CFG (0x010C00) +#define NIC_QSET_RBDR_0_1_THRESH (0x010C10) +#define NIC_QSET_RBDR_0_1_BASE (0x010C20) +#define NIC_QSET_RBDR_0_1_HEAD (0x010C28) +#define NIC_QSET_RBDR_0_1_TAIL (0x010C30) +#define NIC_QSET_RBDR_0_1_DOOR (0x010C38) +#define NIC_QSET_RBDR_0_1_STATUS0 (0x010C40) +#define NIC_QSET_RBDR_0_1_STATUS1 (0x010C48) +#define NIC_QSET_RBDR_0_1_PREFETCH_STATUS (0x010C50) + +#define NIC_VF_MSIX_VECTOR_0_19_ADDR (0x000000) +#define NIC_VF_MSIX_VECTOR_0_19_CTL (0x000008) +#define NIC_VF_MSIX_PBA (0x0F0000) + +/* Offsets within registers */ +#define NIC_MSIX_VEC_SHIFT 4 +#define NIC_Q_NUM_SHIFT 18 +#define NIC_QS_ID_SHIFT 21 +#define NIC_VF_NUM_SHIFT 21 + +/* Port kind configuration register */ +struct pkind_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_42_63:22; + u64 hdr_sl:5; /* Header skip length */ + u64 rx_hdr:3; /* TNS Receive header present */ + u64 lenerr_en:1;/* L2 length error check enable */ + u64 reserved_32_32:1; + u64 maxlen:16; /* Max frame size */ + u64 minlen:16; /* Min frame size */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 minlen:16; + u64 maxlen:16; + u64 reserved_32_32:1; + u64 lenerr_en:1; + u64 rx_hdr:3; + u64 hdr_sl:5; + u64 reserved_42_63:22; +#endif +}; + +#endif /* NIC_REG_H */ diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c new file mode 100644 index 000000000000..0fc4a536afc9 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c @@ -0,0 +1,601 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +/* ETHTOOL Support for VNIC_VF Device*/ + +#include <linux/pci.h> + +#include "nic_reg.h" +#include "nic.h" +#include "nicvf_queues.h" +#include "q_struct.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-nicvf" +#define DRV_VERSION "1.0" + +struct nicvf_stat { + char name[ETH_GSTRING_LEN]; + unsigned int index; +}; + +#define NICVF_HW_STAT(stat) { \ + .name = #stat, \ + .index = offsetof(struct nicvf_hw_stats, stat) / sizeof(u64), \ +} + +#define NICVF_DRV_STAT(stat) { \ + .name = #stat, \ + .index = offsetof(struct nicvf_drv_stats, stat) / sizeof(u64), \ +} + +static const struct nicvf_stat nicvf_hw_stats[] = { + NICVF_HW_STAT(rx_bytes_ok), + NICVF_HW_STAT(rx_ucast_frames_ok), + NICVF_HW_STAT(rx_bcast_frames_ok), + NICVF_HW_STAT(rx_mcast_frames_ok), + NICVF_HW_STAT(rx_fcs_errors), + NICVF_HW_STAT(rx_l2_errors), + NICVF_HW_STAT(rx_drop_red), + NICVF_HW_STAT(rx_drop_red_bytes), + NICVF_HW_STAT(rx_drop_overrun), + NICVF_HW_STAT(rx_drop_overrun_bytes), + NICVF_HW_STAT(rx_drop_bcast), + NICVF_HW_STAT(rx_drop_mcast), + NICVF_HW_STAT(rx_drop_l3_bcast), + NICVF_HW_STAT(rx_drop_l3_mcast), + NICVF_HW_STAT(tx_bytes_ok), + NICVF_HW_STAT(tx_ucast_frames_ok), + NICVF_HW_STAT(tx_bcast_frames_ok), + NICVF_HW_STAT(tx_mcast_frames_ok), +}; + +static const struct nicvf_stat nicvf_drv_stats[] = { + NICVF_DRV_STAT(rx_frames_ok), + NICVF_DRV_STAT(rx_frames_64), + NICVF_DRV_STAT(rx_frames_127), + NICVF_DRV_STAT(rx_frames_255), + NICVF_DRV_STAT(rx_frames_511), + NICVF_DRV_STAT(rx_frames_1023), + NICVF_DRV_STAT(rx_frames_1518), + NICVF_DRV_STAT(rx_frames_jumbo), + NICVF_DRV_STAT(rx_drops), + NICVF_DRV_STAT(tx_frames_ok), + NICVF_DRV_STAT(tx_busy), + NICVF_DRV_STAT(tx_tso), + NICVF_DRV_STAT(tx_drops), +}; + +static const struct nicvf_stat nicvf_queue_stats[] = { + { "bytes", 0 }, + { "frames", 1 }, +}; + +static const unsigned int nicvf_n_hw_stats = ARRAY_SIZE(nicvf_hw_stats); +static const unsigned int nicvf_n_drv_stats = ARRAY_SIZE(nicvf_drv_stats); +static const unsigned int nicvf_n_queue_stats = ARRAY_SIZE(nicvf_queue_stats); + +static int nicvf_get_settings(struct net_device *netdev, + struct ethtool_cmd *cmd) +{ + struct nicvf *nic = netdev_priv(netdev); + + cmd->supported = 0; + cmd->transceiver = XCVR_EXTERNAL; + if (nic->speed <= 1000) { + cmd->port = PORT_MII; + cmd->autoneg = AUTONEG_ENABLE; + } else { + cmd->port = PORT_FIBRE; + cmd->autoneg = AUTONEG_DISABLE; + } + cmd->duplex = nic->duplex; + ethtool_cmd_speed_set(cmd, nic->speed); + + return 0; +} + +static void nicvf_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *info) +{ + struct nicvf *nic = netdev_priv(netdev); + + strlcpy(info->driver, DRV_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_VERSION, sizeof(info->version)); + strlcpy(info->bus_info, pci_name(nic->pdev), sizeof(info->bus_info)); +} + +static u32 nicvf_get_msglevel(struct net_device *netdev) +{ + struct nicvf *nic = netdev_priv(netdev); + + return nic->msg_enable; +} + +static void nicvf_set_msglevel(struct net_device *netdev, u32 lvl) +{ + struct nicvf *nic = netdev_priv(netdev); + + nic->msg_enable = lvl; +} + +static void nicvf_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + int stats, qidx; + + if (sset != ETH_SS_STATS) + return; + + for (stats = 0; stats < nicvf_n_hw_stats; stats++) { + memcpy(data, nicvf_hw_stats[stats].name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + for (stats = 0; stats < nicvf_n_drv_stats; stats++) { + memcpy(data, nicvf_drv_stats[stats].name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) { + for (stats = 0; stats < nicvf_n_queue_stats; stats++) { + sprintf(data, "rxq%d: %s", qidx, + nicvf_queue_stats[stats].name); + data += ETH_GSTRING_LEN; + } + } + + for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) { + for (stats = 0; stats < nicvf_n_queue_stats; stats++) { + sprintf(data, "txq%d: %s", qidx, + nicvf_queue_stats[stats].name); + data += ETH_GSTRING_LEN; + } + } + + for (stats = 0; stats < BGX_RX_STATS_COUNT; stats++) { + sprintf(data, "bgx_rxstat%d: ", stats); + data += ETH_GSTRING_LEN; + } + + for (stats = 0; stats < BGX_TX_STATS_COUNT; stats++) { + sprintf(data, "bgx_txstat%d: ", stats); + data += ETH_GSTRING_LEN; + } +} + +static int nicvf_get_sset_count(struct net_device *netdev, int sset) +{ + if (sset != ETH_SS_STATS) + return -EINVAL; + + return nicvf_n_hw_stats + nicvf_n_drv_stats + + (nicvf_n_queue_stats * + (MAX_RCV_QUEUES_PER_QS + MAX_SND_QUEUES_PER_QS)) + + BGX_RX_STATS_COUNT + BGX_TX_STATS_COUNT; +} + +static void nicvf_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct nicvf *nic = netdev_priv(netdev); + int stat, qidx; + + nicvf_update_stats(nic); + + /* Update LMAC stats */ + nicvf_update_lmac_stats(nic); + + for (stat = 0; stat < nicvf_n_hw_stats; stat++) + *(data++) = ((u64 *)&nic->stats) + [nicvf_hw_stats[stat].index]; + for (stat = 0; stat < nicvf_n_drv_stats; stat++) + *(data++) = ((u64 *)&nic->drv_stats) + [nicvf_drv_stats[stat].index]; + + for (qidx = 0; qidx < MAX_RCV_QUEUES_PER_QS; qidx++) { + for (stat = 0; stat < nicvf_n_queue_stats; stat++) + *(data++) = ((u64 *)&nic->qs->rq[qidx].stats) + [nicvf_queue_stats[stat].index]; + } + + for (qidx = 0; qidx < MAX_SND_QUEUES_PER_QS; qidx++) { + for (stat = 0; stat < nicvf_n_queue_stats; stat++) + *(data++) = ((u64 *)&nic->qs->sq[qidx].stats) + [nicvf_queue_stats[stat].index]; + } + + for (stat = 0; stat < BGX_RX_STATS_COUNT; stat++) + *(data++) = nic->bgx_stats.rx_stats[stat]; + for (stat = 0; stat < BGX_TX_STATS_COUNT; stat++) + *(data++) = nic->bgx_stats.tx_stats[stat]; +} + +static int nicvf_get_regs_len(struct net_device *dev) +{ + return sizeof(u64) * NIC_VF_REG_COUNT; +} + +static void nicvf_get_regs(struct net_device *dev, + struct ethtool_regs *regs, void *reg) +{ + struct nicvf *nic = netdev_priv(dev); + u64 *p = (u64 *)reg; + u64 reg_offset; + int mbox, key, stat, q; + int i = 0; + + regs->version = 0; + memset(p, 0, NIC_VF_REG_COUNT); + + p[i++] = nicvf_reg_read(nic, NIC_VNIC_CFG); + /* Mailbox registers */ + for (mbox = 0; mbox < NIC_PF_VF_MAILBOX_SIZE; mbox++) + p[i++] = nicvf_reg_read(nic, + NIC_VF_PF_MAILBOX_0_1 | (mbox << 3)); + + p[i++] = nicvf_reg_read(nic, NIC_VF_INT); + p[i++] = nicvf_reg_read(nic, NIC_VF_INT_W1S); + p[i++] = nicvf_reg_read(nic, NIC_VF_ENA_W1C); + p[i++] = nicvf_reg_read(nic, NIC_VF_ENA_W1S); + p[i++] = nicvf_reg_read(nic, NIC_VNIC_RSS_CFG); + + for (key = 0; key < RSS_HASH_KEY_SIZE; key++) + p[i++] = nicvf_reg_read(nic, NIC_VNIC_RSS_KEY_0_4 | (key << 3)); + + /* Tx/Rx statistics */ + for (stat = 0; stat < TX_STATS_ENUM_LAST; stat++) + p[i++] = nicvf_reg_read(nic, + NIC_VNIC_TX_STAT_0_4 | (stat << 3)); + + for (i = 0; i < RX_STATS_ENUM_LAST; i++) + p[i++] = nicvf_reg_read(nic, + NIC_VNIC_RX_STAT_0_13 | (stat << 3)); + + p[i++] = nicvf_reg_read(nic, NIC_QSET_RQ_GEN_CFG); + + /* All completion queue's registers */ + for (q = 0; q < MAX_CMP_QUEUES_PER_QS; q++) { + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_CFG, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_CFG2, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_THRESH, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_BASE, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_TAIL, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_DOOR, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS2, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_DEBUG, q); + } + + /* All receive queue's registers */ + for (q = 0; q < MAX_RCV_QUEUES_PER_QS; q++) { + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RQ_0_7_CFG, q); + p[i++] = nicvf_queue_reg_read(nic, + NIC_QSET_RQ_0_7_STAT_0_1, q); + reg_offset = NIC_QSET_RQ_0_7_STAT_0_1 | (1 << 3); + p[i++] = nicvf_queue_reg_read(nic, reg_offset, q); + } + + for (q = 0; q < MAX_SND_QUEUES_PER_QS; q++) { + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_THRESH, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_BASE, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DOOR, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STATUS, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_DEBUG, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CNM_CHG, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1, q); + reg_offset = NIC_QSET_SQ_0_7_STAT_0_1 | (1 << 3); + p[i++] = nicvf_queue_reg_read(nic, reg_offset, q); + } + + for (q = 0; q < MAX_RCV_BUF_DESC_RINGS_PER_QS; q++) { + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_CFG, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_THRESH, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_BASE, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_HEAD, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, q); + p[i++] = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_DOOR, q); + p[i++] = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_STATUS0, q); + p[i++] = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_STATUS1, q); + reg_offset = NIC_QSET_RBDR_0_1_PREFETCH_STATUS; + p[i++] = nicvf_queue_reg_read(nic, reg_offset, q); + } +} + +static int nicvf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *cmd) +{ + struct nicvf *nic = netdev_priv(netdev); + + cmd->rx_coalesce_usecs = nic->cq_coalesce_usecs; + return 0; +} + +static void nicvf_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct nicvf *nic = netdev_priv(netdev); + struct queue_set *qs = nic->qs; + + ring->rx_max_pending = MAX_RCV_BUF_COUNT; + ring->rx_pending = qs->rbdr_len; + ring->tx_max_pending = MAX_SND_QUEUE_LEN; + ring->tx_pending = qs->sq_len; +} + +static int nicvf_get_rss_hash_opts(struct nicvf *nic, + struct ethtool_rxnfc *info) +{ + info->data = 0; + + switch (info->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + case UDP_V4_FLOW: + case UDP_V6_FLOW: + case SCTP_V4_FLOW: + case SCTP_V6_FLOW: + info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case IPV4_FLOW: + case IPV6_FLOW: + info->data |= RXH_IP_SRC | RXH_IP_DST; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int nicvf_get_rxnfc(struct net_device *dev, + struct ethtool_rxnfc *info, u32 *rules) +{ + struct nicvf *nic = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = nic->qs->rq_cnt; + ret = 0; + break; + case ETHTOOL_GRXFH: + return nicvf_get_rss_hash_opts(nic, info); + default: + break; + } + return ret; +} + +static int nicvf_set_rss_hash_opts(struct nicvf *nic, + struct ethtool_rxnfc *info) +{ + struct nicvf_rss_info *rss = &nic->rss_info; + u64 rss_cfg = nicvf_reg_read(nic, NIC_VNIC_RSS_CFG); + + if (!rss->enable) + netdev_err(nic->netdev, + "RSS is disabled, hash cannot be set\n"); + + netdev_info(nic->netdev, "Set RSS flow type = %d, data = %lld\n", + info->flow_type, info->data); + + if (!(info->data & RXH_IP_SRC) || !(info->data & RXH_IP_DST)) + return -EINVAL; + + switch (info->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_cfg &= ~(1ULL << RSS_HASH_TCP); + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_cfg |= (1ULL << RSS_HASH_TCP); + break; + default: + return -EINVAL; + } + break; + case UDP_V4_FLOW: + case UDP_V6_FLOW: + switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_cfg &= ~(1ULL << RSS_HASH_UDP); + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_cfg |= (1ULL << RSS_HASH_UDP); + break; + default: + return -EINVAL; + } + break; + case SCTP_V4_FLOW: + case SCTP_V6_FLOW: + switch (info->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + rss_cfg &= ~(1ULL << RSS_HASH_L4ETC); + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + rss_cfg |= (1ULL << RSS_HASH_L4ETC); + break; + default: + return -EINVAL; + } + break; + case IPV4_FLOW: + case IPV6_FLOW: + rss_cfg = RSS_HASH_IP; + break; + default: + return -EINVAL; + } + + nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss_cfg); + return 0; +} + +static int nicvf_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info) +{ + struct nicvf *nic = netdev_priv(dev); + + switch (info->cmd) { + case ETHTOOL_SRXFH: + return nicvf_set_rss_hash_opts(nic, info); + default: + break; + } + return -EOPNOTSUPP; +} + +static u32 nicvf_get_rxfh_key_size(struct net_device *netdev) +{ + return RSS_HASH_KEY_SIZE * sizeof(u64); +} + +static u32 nicvf_get_rxfh_indir_size(struct net_device *dev) +{ + struct nicvf *nic = netdev_priv(dev); + + return nic->rss_info.rss_size; +} + +static int nicvf_get_rxfh(struct net_device *dev, u32 *indir, u8 *hkey, + u8 *hfunc) +{ + struct nicvf *nic = netdev_priv(dev); + struct nicvf_rss_info *rss = &nic->rss_info; + int idx; + + if (indir) { + for (idx = 0; idx < rss->rss_size; idx++) + indir[idx] = rss->ind_tbl[idx]; + } + + if (hkey) + memcpy(hkey, rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; + + return 0; +} + +static int nicvf_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *hkey, u8 hfunc) +{ + struct nicvf *nic = netdev_priv(dev); + struct nicvf_rss_info *rss = &nic->rss_info; + int idx; + + if ((nic->qs->rq_cnt <= 1) || (nic->cpi_alg != CPI_ALG_NONE)) { + rss->enable = false; + rss->hash_bits = 0; + return -EIO; + } + + /* We do not allow change in unsupported parameters */ + if (hkey || + (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)) + return -EOPNOTSUPP; + + rss->enable = true; + if (indir) { + for (idx = 0; idx < rss->rss_size; idx++) + rss->ind_tbl[idx] = indir[idx]; + } + + if (hkey) { + memcpy(rss->key, hkey, RSS_HASH_KEY_SIZE * sizeof(u64)); + nicvf_set_rss_key(nic); + } + + nicvf_config_rss(nic); + return 0; +} + +/* Get no of queues device supports and current queue count */ +static void nicvf_get_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct nicvf *nic = netdev_priv(dev); + + memset(channel, 0, sizeof(*channel)); + + channel->max_rx = MAX_RCV_QUEUES_PER_QS; + channel->max_tx = MAX_SND_QUEUES_PER_QS; + + channel->rx_count = nic->qs->rq_cnt; + channel->tx_count = nic->qs->sq_cnt; +} + +/* Set no of Tx, Rx queues to be used */ +static int nicvf_set_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct nicvf *nic = netdev_priv(dev); + int err = 0; + + if (!channel->rx_count || !channel->tx_count) + return -EINVAL; + if (channel->rx_count > MAX_RCV_QUEUES_PER_QS) + return -EINVAL; + if (channel->tx_count > MAX_SND_QUEUES_PER_QS) + return -EINVAL; + + nic->qs->rq_cnt = channel->rx_count; + nic->qs->sq_cnt = channel->tx_count; + nic->qs->cq_cnt = max(nic->qs->rq_cnt, nic->qs->sq_cnt); + + err = nicvf_set_real_num_queues(dev, nic->qs->sq_cnt, nic->qs->rq_cnt); + if (err) + return err; + + if (!netif_running(dev)) + return err; + + nicvf_stop(dev); + nicvf_open(dev); + netdev_info(dev, "Setting num Tx rings to %d, Rx rings to %d success\n", + nic->qs->sq_cnt, nic->qs->rq_cnt); + + return err; +} + +static const struct ethtool_ops nicvf_ethtool_ops = { + .get_settings = nicvf_get_settings, + .get_link = ethtool_op_get_link, + .get_drvinfo = nicvf_get_drvinfo, + .get_msglevel = nicvf_get_msglevel, + .set_msglevel = nicvf_set_msglevel, + .get_strings = nicvf_get_strings, + .get_sset_count = nicvf_get_sset_count, + .get_ethtool_stats = nicvf_get_ethtool_stats, + .get_regs_len = nicvf_get_regs_len, + .get_regs = nicvf_get_regs, + .get_coalesce = nicvf_get_coalesce, + .get_ringparam = nicvf_get_ringparam, + .get_rxnfc = nicvf_get_rxnfc, + .set_rxnfc = nicvf_set_rxnfc, + .get_rxfh_key_size = nicvf_get_rxfh_key_size, + .get_rxfh_indir_size = nicvf_get_rxfh_indir_size, + .get_rxfh = nicvf_get_rxfh, + .set_rxfh = nicvf_set_rxfh, + .get_channels = nicvf_get_channels, + .set_channels = nicvf_set_channels, + .get_ts_info = ethtool_op_get_ts_info, +}; + +void nicvf_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &nicvf_ethtool_ops; +} diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c new file mode 100644 index 000000000000..abd446e6155b --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -0,0 +1,1332 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/log2.h> +#include <linux/prefetch.h> +#include <linux/irq.h> + +#include "nic_reg.h" +#include "nic.h" +#include "nicvf_queues.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-nicvf" +#define DRV_VERSION "1.0" + +/* Supported devices */ +static const struct pci_device_id nicvf_id_table[] = { + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_NIC_VF, + PCI_VENDOR_ID_CAVIUM, 0xA11E) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, + PCI_VENDOR_ID_CAVIUM, 0xA11E) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Sunil Goutham"); +MODULE_DESCRIPTION("Cavium Thunder NIC Virtual Function Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, nicvf_id_table); + +static int debug = 0x00; +module_param(debug, int, 0644); +MODULE_PARM_DESC(debug, "Debug message level bitmap"); + +static int cpi_alg = CPI_ALG_NONE; +module_param(cpi_alg, int, S_IRUGO); +MODULE_PARM_DESC(cpi_alg, + "PFC algorithm (0=none, 1=VLAN, 2=VLAN16, 3=IP Diffserv)"); + +static int nicvf_enable_msix(struct nicvf *nic); +static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev); +static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx); + +static inline void nicvf_set_rx_frame_cnt(struct nicvf *nic, + struct sk_buff *skb) +{ + if (skb->len <= 64) + nic->drv_stats.rx_frames_64++; + else if (skb->len <= 127) + nic->drv_stats.rx_frames_127++; + else if (skb->len <= 255) + nic->drv_stats.rx_frames_255++; + else if (skb->len <= 511) + nic->drv_stats.rx_frames_511++; + else if (skb->len <= 1023) + nic->drv_stats.rx_frames_1023++; + else if (skb->len <= 1518) + nic->drv_stats.rx_frames_1518++; + else + nic->drv_stats.rx_frames_jumbo++; +} + +/* The Cavium ThunderX network controller can *only* be found in SoCs + * containing the ThunderX ARM64 CPU implementation. All accesses to the device + * registers on this platform are implicitly strongly ordered with respect + * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use + * with no memory barriers in this driver. The readq()/writeq() functions add + * explicit ordering operation which in this case are redundant, and only + * add overhead. + */ + +/* Register read/write APIs */ +void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val) +{ + writeq_relaxed(val, nic->reg_base + offset); +} + +u64 nicvf_reg_read(struct nicvf *nic, u64 offset) +{ + return readq_relaxed(nic->reg_base + offset); +} + +void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, + u64 qidx, u64 val) +{ + void __iomem *addr = nic->reg_base + offset; + + writeq_relaxed(val, addr + (qidx << NIC_Q_NUM_SHIFT)); +} + +u64 nicvf_queue_reg_read(struct nicvf *nic, u64 offset, u64 qidx) +{ + void __iomem *addr = nic->reg_base + offset; + + return readq_relaxed(addr + (qidx << NIC_Q_NUM_SHIFT)); +} + +/* VF -> PF mailbox communication */ + +int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) +{ + int timeout = NIC_MBOX_MSG_TIMEOUT; + int sleep = 10; + u64 *msg = (u64 *)mbx; + + nic->pf_acked = false; + nic->pf_nacked = false; + + nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]); + nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]); + + /* Wait for previous message to be acked, timeout 2sec */ + while (!nic->pf_acked) { + if (nic->pf_nacked) + return -EINVAL; + msleep(sleep); + if (nic->pf_acked) + break; + timeout -= sleep; + if (!timeout) { + netdev_err(nic->netdev, + "PF didn't ack to mbox msg %d from VF%d\n", + (mbx->msg.msg & 0xFF), nic->vf_id); + return -EBUSY; + } + } + return 0; +} + +/* Checks if VF is able to comminicate with PF +* and also gets the VNIC number this VF is associated to. +*/ +static int nicvf_check_pf_ready(struct nicvf *nic) +{ + int timeout = 5000, sleep = 20; + + nic->pf_ready_to_rcv_msg = false; + + nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, + le64_to_cpu(NIC_MBOX_MSG_READY)); + nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, 1ULL); + + while (!nic->pf_ready_to_rcv_msg) { + msleep(sleep); + if (nic->pf_ready_to_rcv_msg) + break; + timeout -= sleep; + if (!timeout) { + netdev_err(nic->netdev, + "PF didn't respond to READY msg\n"); + return 0; + } + } + return 1; +} + +static void nicvf_handle_mbx_intr(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + u64 *mbx_data; + u64 mbx_addr; + int i; + + mbx_addr = NIC_VF_PF_MAILBOX_0_1; + mbx_data = (u64 *)&mbx; + + for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) { + *mbx_data = nicvf_reg_read(nic, mbx_addr); + mbx_data++; + mbx_addr += sizeof(u64); + } + + netdev_dbg(nic->netdev, "Mbox message: msg: 0x%x\n", mbx.msg.msg); + switch (mbx.msg.msg) { + case NIC_MBOX_MSG_READY: + nic->pf_ready_to_rcv_msg = true; + nic->vf_id = mbx.nic_cfg.vf_id & 0x7F; + nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F; + nic->node = mbx.nic_cfg.node_id; + ether_addr_copy(nic->netdev->dev_addr, + (u8 *)&mbx.nic_cfg.mac_addr); + nic->link_up = false; + nic->duplex = 0; + nic->speed = 0; + break; + case NIC_MBOX_MSG_ACK: + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_NACK: + nic->pf_nacked = true; + break; + case NIC_MBOX_MSG_RSS_SIZE: + nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size; + nic->pf_acked = true; + break; + case NIC_MBOX_MSG_BGX_STATS: + nicvf_read_bgx_stats(nic, &mbx.bgx_stats); + nic->pf_acked = true; + nic->bgx_stats_acked = true; + break; + case NIC_MBOX_MSG_BGX_LINK_CHANGE: + nic->pf_acked = true; + nic->link_up = mbx.link_status.link_up; + nic->duplex = mbx.link_status.duplex; + nic->speed = mbx.link_status.speed; + if (nic->link_up) { + netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n", + nic->netdev->name, nic->speed, + nic->duplex == DUPLEX_FULL ? + "Full duplex" : "Half duplex"); + netif_carrier_on(nic->netdev); + netif_tx_wake_all_queues(nic->netdev); + } else { + netdev_info(nic->netdev, "%s: Link is Down\n", + nic->netdev->name); + netif_carrier_off(nic->netdev); + netif_tx_stop_all_queues(nic->netdev); + } + break; + default: + netdev_err(nic->netdev, + "Invalid message from PF, msg 0x%x\n", mbx.msg.msg); + break; + } + nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0); +} + +static int nicvf_hw_set_mac_addr(struct nicvf *nic, struct net_device *netdev) +{ + union nic_mbx mbx = {}; + int i; + + mbx.mac.msg = NIC_MBOX_MSG_SET_MAC; + mbx.mac.vf_id = nic->vf_id; + for (i = 0; i < ETH_ALEN; i++) + mbx.mac.addr = (mbx.mac.addr << 8) | + netdev->dev_addr[i]; + + return nicvf_send_msg_to_pf(nic, &mbx); +} + +void nicvf_config_cpi(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG; + mbx.cpi_cfg.vf_id = nic->vf_id; + mbx.cpi_cfg.cpi_alg = nic->cpi_alg; + mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt; + + nicvf_send_msg_to_pf(nic, &mbx); +} + +void nicvf_get_rss_size(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; + mbx.rss_size.vf_id = nic->vf_id; + nicvf_send_msg_to_pf(nic, &mbx); +} + +void nicvf_config_rss(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + struct nicvf_rss_info *rss = &nic->rss_info; + int ind_tbl_len = rss->rss_size; + int i, nextq = 0; + + mbx.rss_cfg.vf_id = nic->vf_id; + mbx.rss_cfg.hash_bits = rss->hash_bits; + while (ind_tbl_len) { + mbx.rss_cfg.tbl_offset = nextq; + mbx.rss_cfg.tbl_len = min(ind_tbl_len, + RSS_IND_TBL_LEN_PER_MBX_MSG); + mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ? + NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG; + + for (i = 0; i < mbx.rss_cfg.tbl_len; i++) + mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++]; + + nicvf_send_msg_to_pf(nic, &mbx); + + ind_tbl_len -= mbx.rss_cfg.tbl_len; + } +} + +void nicvf_set_rss_key(struct nicvf *nic) +{ + struct nicvf_rss_info *rss = &nic->rss_info; + u64 key_addr = NIC_VNIC_RSS_KEY_0_4; + int idx; + + for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) { + nicvf_reg_write(nic, key_addr, rss->key[idx]); + key_addr += sizeof(u64); + } +} + +static int nicvf_rss_init(struct nicvf *nic) +{ + struct nicvf_rss_info *rss = &nic->rss_info; + int idx; + + nicvf_get_rss_size(nic); + + if ((nic->qs->rq_cnt <= 1) || (cpi_alg != CPI_ALG_NONE)) { + rss->enable = false; + rss->hash_bits = 0; + return 0; + } + + rss->enable = true; + + /* Using the HW reset value for now */ + rss->key[0] = 0xFEED0BADFEED0BAD; + rss->key[1] = 0xFEED0BADFEED0BAD; + rss->key[2] = 0xFEED0BADFEED0BAD; + rss->key[3] = 0xFEED0BADFEED0BAD; + rss->key[4] = 0xFEED0BADFEED0BAD; + + nicvf_set_rss_key(nic); + + rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; + nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg); + + rss->hash_bits = ilog2(rounddown_pow_of_two(rss->rss_size)); + + for (idx = 0; idx < rss->rss_size; idx++) + rss->ind_tbl[idx] = ethtool_rxfh_indir_default(idx, + nic->qs->rq_cnt); + nicvf_config_rss(nic); + return 1; +} + +int nicvf_set_real_num_queues(struct net_device *netdev, + int tx_queues, int rx_queues) +{ + int err = 0; + + err = netif_set_real_num_tx_queues(netdev, tx_queues); + if (err) { + netdev_err(netdev, + "Failed to set no of Tx queues: %d\n", tx_queues); + return err; + } + + err = netif_set_real_num_rx_queues(netdev, rx_queues); + if (err) + netdev_err(netdev, + "Failed to set no of Rx queues: %d\n", rx_queues); + return err; +} + +static int nicvf_init_resources(struct nicvf *nic) +{ + int err; + u64 mbx_addr = NIC_VF_PF_MAILBOX_0_1; + + /* Enable Qset */ + nicvf_qset_config(nic, true); + + /* Initialize queues and HW for data transfer */ + err = nicvf_config_data_transfer(nic, true); + if (err) { + netdev_err(nic->netdev, + "Failed to alloc/config VF's QSet resources\n"); + return err; + } + + /* Send VF config done msg to PF */ + nicvf_reg_write(nic, mbx_addr, le64_to_cpu(NIC_MBOX_MSG_CFG_DONE)); + mbx_addr += (NIC_PF_VF_MAILBOX_SIZE - 1) * 8; + nicvf_reg_write(nic, mbx_addr, 1ULL); + + return 0; +} + +static void nicvf_snd_pkt_handler(struct net_device *netdev, + struct cmp_queue *cq, + struct cqe_send_t *cqe_tx, int cqe_type) +{ + struct sk_buff *skb = NULL; + struct nicvf *nic = netdev_priv(netdev); + struct snd_queue *sq; + struct sq_hdr_subdesc *hdr; + + sq = &nic->qs->sq[cqe_tx->sq_idx]; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, cqe_tx->sqe_ptr); + if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) + return; + + netdev_dbg(nic->netdev, + "%s Qset #%d SQ #%d SQ ptr #%d subdesc count %d\n", + __func__, cqe_tx->sq_qs, cqe_tx->sq_idx, + cqe_tx->sqe_ptr, hdr->subdesc_cnt); + + nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + nicvf_check_cqe_tx_errs(nic, cq, cqe_tx); + skb = (struct sk_buff *)sq->skbuff[cqe_tx->sqe_ptr]; + /* For TSO offloaded packets only one head SKB needs to be freed */ + if (skb) { + prefetch(skb); + dev_consume_skb_any(skb); + } +} + +static void nicvf_rcv_pkt_handler(struct net_device *netdev, + struct napi_struct *napi, + struct cmp_queue *cq, + struct cqe_rx_t *cqe_rx, int cqe_type) +{ + struct sk_buff *skb; + struct nicvf *nic = netdev_priv(netdev); + int err = 0; + + /* Check for errors */ + err = nicvf_check_cqe_rx_errs(nic, cq, cqe_rx); + if (err && !cqe_rx->rb_cnt) + return; + + skb = nicvf_get_rcv_skb(nic, cqe_rx); + if (!skb) { + netdev_dbg(nic->netdev, "Packet not received\n"); + return; + } + + if (netif_msg_pktdata(nic)) { + netdev_info(nic->netdev, "%s: skb 0x%p, len=%d\n", netdev->name, + skb, skb->len); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_OFFSET, 16, 1, + skb->data, skb->len, true); + } + + nicvf_set_rx_frame_cnt(nic, skb); + + skb_record_rx_queue(skb, cqe_rx->rq_idx); + if (netdev->hw_features & NETIF_F_RXCSUM) { + /* HW by default verifies TCP/UDP/SCTP checksums */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else { + skb_checksum_none_assert(skb); + } + + skb->protocol = eth_type_trans(skb, netdev); + + if (napi && (netdev->features & NETIF_F_GRO)) + napi_gro_receive(napi, skb); + else + netif_receive_skb(skb); +} + +static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, + struct napi_struct *napi, int budget) +{ + int processed_cqe, work_done = 0; + int cqe_count, cqe_head; + struct nicvf *nic = netdev_priv(netdev); + struct queue_set *qs = nic->qs; + struct cmp_queue *cq = &qs->cq[cq_idx]; + struct cqe_rx_t *cq_desc; + + spin_lock_bh(&cq->lock); +loop: + processed_cqe = 0; + /* Get no of valid CQ entries to process */ + cqe_count = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, cq_idx); + cqe_count &= CQ_CQE_COUNT; + if (!cqe_count) + goto done; + + /* Get head of the valid CQ entries */ + cqe_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, cq_idx) >> 9; + cqe_head &= 0xFFFF; + + netdev_dbg(nic->netdev, "%s cqe_count %d cqe_head %d\n", + __func__, cqe_count, cqe_head); + while (processed_cqe < cqe_count) { + /* Get the CQ descriptor */ + cq_desc = (struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head); + cqe_head++; + cqe_head &= (cq->dmem.q_len - 1); + /* Initiate prefetch for next descriptor */ + prefetch((struct cqe_rx_t *)GET_CQ_DESC(cq, cqe_head)); + + if ((work_done >= budget) && napi && + (cq_desc->cqe_type != CQE_TYPE_SEND)) { + break; + } + + netdev_dbg(nic->netdev, "cq_desc->cqe_type %d\n", + cq_desc->cqe_type); + switch (cq_desc->cqe_type) { + case CQE_TYPE_RX: + nicvf_rcv_pkt_handler(netdev, napi, cq, + cq_desc, CQE_TYPE_RX); + work_done++; + break; + case CQE_TYPE_SEND: + nicvf_snd_pkt_handler(netdev, cq, + (void *)cq_desc, CQE_TYPE_SEND); + break; + case CQE_TYPE_INVALID: + case CQE_TYPE_RX_SPLIT: + case CQE_TYPE_RX_TCP: + case CQE_TYPE_SEND_PTP: + /* Ignore for now */ + break; + } + processed_cqe++; + } + netdev_dbg(nic->netdev, "%s processed_cqe %d work_done %d budget %d\n", + __func__, processed_cqe, work_done, budget); + + /* Ring doorbell to inform H/W to reuse processed CQEs */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_DOOR, + cq_idx, processed_cqe); + + if ((work_done < budget) && napi) + goto loop; + +done: + spin_unlock_bh(&cq->lock); + return work_done; +} + +static int nicvf_poll(struct napi_struct *napi, int budget) +{ + u64 cq_head; + int work_done = 0; + struct net_device *netdev = napi->dev; + struct nicvf *nic = netdev_priv(netdev); + struct nicvf_cq_poll *cq; + struct netdev_queue *txq; + + cq = container_of(napi, struct nicvf_cq_poll, napi); + work_done = nicvf_cq_intr_handler(netdev, cq->cq_idx, napi, budget); + + txq = netdev_get_tx_queue(netdev, cq->cq_idx); + if (netif_tx_queue_stopped(txq)) + netif_tx_wake_queue(txq); + + if (work_done < budget) { + /* Slow packet rate, exit polling */ + napi_complete(napi); + /* Re-enable interrupts */ + cq_head = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_HEAD, + cq->cq_idx); + nicvf_clear_intr(nic, NICVF_INTR_CQ, cq->cq_idx); + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_HEAD, + cq->cq_idx, cq_head); + nicvf_enable_intr(nic, NICVF_INTR_CQ, cq->cq_idx); + } + return work_done; +} + +/* Qset error interrupt handler + * + * As of now only CQ errors are handled + */ +void nicvf_handle_qs_err(unsigned long data) +{ + struct nicvf *nic = (struct nicvf *)data; + struct queue_set *qs = nic->qs; + int qidx; + u64 status; + + netif_tx_disable(nic->netdev); + + /* Check if it is CQ err */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + status = nicvf_queue_reg_read(nic, NIC_QSET_CQ_0_7_STATUS, + qidx); + if (!(status & CQ_ERR_MASK)) + continue; + /* Process already queued CQEs and reconfig CQ */ + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); + nicvf_sq_disable(nic, qidx); + nicvf_cq_intr_handler(nic->netdev, qidx, NULL, 0); + nicvf_cmp_queue_config(nic, qs, qidx, true); + nicvf_sq_free_used_descs(nic->netdev, &qs->sq[qidx], qidx); + nicvf_sq_enable(nic, &qs->sq[qidx], qidx); + + nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); + } + + netif_tx_start_all_queues(nic->netdev); + /* Re-enable Qset error interrupt */ + nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); +} + +static irqreturn_t nicvf_misc_intr_handler(int irq, void *nicvf_irq) +{ + struct nicvf *nic = (struct nicvf *)nicvf_irq; + u64 intr; + + intr = nicvf_reg_read(nic, NIC_VF_INT); + /* Check for spurious interrupt */ + if (!(intr & NICVF_INTR_MBOX_MASK)) + return IRQ_HANDLED; + + nicvf_handle_mbx_intr(nic); + + return IRQ_HANDLED; +} + +static irqreturn_t nicvf_intr_handler(int irq, void *nicvf_irq) +{ + u64 qidx, intr, clear_intr = 0; + u64 cq_intr, rbdr_intr, qs_err_intr; + struct nicvf *nic = (struct nicvf *)nicvf_irq; + struct queue_set *qs = nic->qs; + struct nicvf_cq_poll *cq_poll = NULL; + + intr = nicvf_reg_read(nic, NIC_VF_INT); + if (netif_msg_intr(nic)) + netdev_info(nic->netdev, "%s: interrupt status 0x%llx\n", + nic->netdev->name, intr); + + qs_err_intr = intr & NICVF_INTR_QS_ERR_MASK; + if (qs_err_intr) { + /* Disable Qset err interrupt and schedule softirq */ + nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); + tasklet_hi_schedule(&nic->qs_err_task); + clear_intr |= qs_err_intr; + } + + /* Disable interrupts and start polling */ + cq_intr = (intr & NICVF_INTR_CQ_MASK) >> NICVF_INTR_CQ_SHIFT; + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + if (!(cq_intr & (1 << qidx))) + continue; + if (!nicvf_is_intr_enabled(nic, NICVF_INTR_CQ, qidx)) + continue; + + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); + clear_intr |= ((1 << qidx) << NICVF_INTR_CQ_SHIFT); + + cq_poll = nic->napi[qidx]; + /* Schedule NAPI */ + if (cq_poll) + napi_schedule(&cq_poll->napi); + } + + /* Handle RBDR interrupts */ + rbdr_intr = (intr & NICVF_INTR_RBDR_MASK) >> NICVF_INTR_RBDR_SHIFT; + if (rbdr_intr) { + /* Disable RBDR interrupt and schedule softirq */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { + if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx)) + continue; + nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); + tasklet_hi_schedule(&nic->rbdr_task); + clear_intr |= ((1 << qidx) << NICVF_INTR_RBDR_SHIFT); + } + } + + /* Clear interrupts */ + nicvf_reg_write(nic, NIC_VF_INT, clear_intr); + return IRQ_HANDLED; +} + +static int nicvf_enable_msix(struct nicvf *nic) +{ + int ret, vec; + + nic->num_vec = NIC_VF_MSIX_VECTORS; + + for (vec = 0; vec < nic->num_vec; vec++) + nic->msix_entries[vec].entry = vec; + + ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); + if (ret) { + netdev_err(nic->netdev, + "Req for #%d msix vectors failed\n", nic->num_vec); + return 0; + } + nic->msix_enabled = 1; + return 1; +} + +static void nicvf_disable_msix(struct nicvf *nic) +{ + if (nic->msix_enabled) { + pci_disable_msix(nic->pdev); + nic->msix_enabled = 0; + nic->num_vec = 0; + } +} + +static int nicvf_register_interrupts(struct nicvf *nic) +{ + int irq, free, ret = 0; + int vector; + + for_each_cq_irq(irq) + sprintf(nic->irq_name[irq], "NICVF%d CQ%d", + nic->vf_id, irq); + + for_each_sq_irq(irq) + sprintf(nic->irq_name[irq], "NICVF%d SQ%d", + nic->vf_id, irq - NICVF_INTR_ID_SQ); + + for_each_rbdr_irq(irq) + sprintf(nic->irq_name[irq], "NICVF%d RBDR%d", + nic->vf_id, irq - NICVF_INTR_ID_RBDR); + + /* Register all interrupts except mailbox */ + for (irq = 0; irq < NICVF_INTR_ID_SQ; irq++) { + vector = nic->msix_entries[irq].vector; + ret = request_irq(vector, nicvf_intr_handler, + 0, nic->irq_name[irq], nic); + if (ret) + break; + nic->irq_allocated[irq] = true; + } + + for (irq = NICVF_INTR_ID_SQ; irq < NICVF_INTR_ID_MISC; irq++) { + vector = nic->msix_entries[irq].vector; + ret = request_irq(vector, nicvf_intr_handler, + 0, nic->irq_name[irq], nic); + if (ret) + break; + nic->irq_allocated[irq] = true; + } + + sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], + "NICVF%d Qset error", nic->vf_id); + if (!ret) { + vector = nic->msix_entries[NICVF_INTR_ID_QS_ERR].vector; + irq = NICVF_INTR_ID_QS_ERR; + ret = request_irq(vector, nicvf_intr_handler, + 0, nic->irq_name[irq], nic); + if (!ret) + nic->irq_allocated[irq] = true; + } + + if (ret) { + netdev_err(nic->netdev, "Request irq failed\n"); + for (free = 0; free < irq; free++) + free_irq(nic->msix_entries[free].vector, nic); + return ret; + } + + return 0; +} + +static void nicvf_unregister_interrupts(struct nicvf *nic) +{ + int irq; + + /* Free registered interrupts */ + for (irq = 0; irq < nic->num_vec; irq++) { + if (nic->irq_allocated[irq]) + free_irq(nic->msix_entries[irq].vector, nic); + nic->irq_allocated[irq] = false; + } + + /* Disable MSI-X */ + nicvf_disable_msix(nic); +} + +/* Initialize MSIX vectors and register MISC interrupt. + * Send READY message to PF to check if its alive + */ +static int nicvf_register_misc_interrupt(struct nicvf *nic) +{ + int ret = 0; + int irq = NICVF_INTR_ID_MISC; + + /* Return if mailbox interrupt is already registered */ + if (nic->msix_enabled) + return 0; + + /* Enable MSI-X */ + if (!nicvf_enable_msix(nic)) + return 1; + + sprintf(nic->irq_name[irq], "%s Mbox", "NICVF"); + /* Register Misc interrupt */ + ret = request_irq(nic->msix_entries[irq].vector, + nicvf_misc_intr_handler, 0, nic->irq_name[irq], nic); + + if (ret) + return ret; + nic->irq_allocated[irq] = true; + + /* Enable mailbox interrupt */ + nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0); + + /* Check if VF is able to communicate with PF */ + if (!nicvf_check_pf_ready(nic)) { + nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); + nicvf_unregister_interrupts(nic); + return 1; + } + + return 0; +} + +static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct nicvf *nic = netdev_priv(netdev); + int qid = skb_get_queue_mapping(skb); + struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid); + + /* Check for minimum packet length */ + if (skb->len <= ETH_HLEN) { + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + + if (!nicvf_sq_append_skb(nic, skb) && !netif_tx_queue_stopped(txq)) { + netif_tx_stop_queue(txq); + nic->drv_stats.tx_busy++; + if (netif_msg_tx_err(nic)) + netdev_warn(netdev, + "%s: Transmit ring full, stopping SQ%d\n", + netdev->name, qid); + + return NETDEV_TX_BUSY; + } + + return NETDEV_TX_OK; +} + +int nicvf_stop(struct net_device *netdev) +{ + int irq, qidx; + struct nicvf *nic = netdev_priv(netdev); + struct queue_set *qs = nic->qs; + struct nicvf_cq_poll *cq_poll = NULL; + union nic_mbx mbx = {}; + + mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN; + nicvf_send_msg_to_pf(nic, &mbx); + + netif_carrier_off(netdev); + netif_tx_disable(netdev); + + /* Disable RBDR & QS error interrupts */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { + nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx); + nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx); + } + nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0); + nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0); + + /* Wait for pending IRQ handlers to finish */ + for (irq = 0; irq < nic->num_vec; irq++) + synchronize_irq(nic->msix_entries[irq].vector); + + tasklet_kill(&nic->rbdr_task); + tasklet_kill(&nic->qs_err_task); + if (nic->rb_work_scheduled) + cancel_delayed_work_sync(&nic->rbdr_work); + + for (qidx = 0; qidx < nic->qs->cq_cnt; qidx++) { + cq_poll = nic->napi[qidx]; + if (!cq_poll) + continue; + nic->napi[qidx] = NULL; + napi_synchronize(&cq_poll->napi); + /* CQ intr is enabled while napi_complete, + * so disable it now + */ + nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx); + nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx); + napi_disable(&cq_poll->napi); + netif_napi_del(&cq_poll->napi); + kfree(cq_poll); + } + + /* Free resources */ + nicvf_config_data_transfer(nic, false); + + /* Disable HW Qset */ + nicvf_qset_config(nic, false); + + /* disable mailbox interrupt */ + nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); + + nicvf_unregister_interrupts(nic); + + return 0; +} + +int nicvf_open(struct net_device *netdev) +{ + int err, qidx; + struct nicvf *nic = netdev_priv(netdev); + struct queue_set *qs = nic->qs; + struct nicvf_cq_poll *cq_poll = NULL; + + nic->mtu = netdev->mtu; + + netif_carrier_off(netdev); + + err = nicvf_register_misc_interrupt(nic); + if (err) + return err; + + /* Register NAPI handler for processing CQEs */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + cq_poll = kzalloc(sizeof(*cq_poll), GFP_KERNEL); + if (!cq_poll) { + err = -ENOMEM; + goto napi_del; + } + cq_poll->cq_idx = qidx; + netif_napi_add(netdev, &cq_poll->napi, nicvf_poll, + NAPI_POLL_WEIGHT); + napi_enable(&cq_poll->napi); + nic->napi[qidx] = cq_poll; + } + + /* Check if we got MAC address from PF or else generate a radom MAC */ + if (is_zero_ether_addr(netdev->dev_addr)) { + eth_hw_addr_random(netdev); + nicvf_hw_set_mac_addr(nic, netdev); + } + + /* Init tasklet for handling Qset err interrupt */ + tasklet_init(&nic->qs_err_task, nicvf_handle_qs_err, + (unsigned long)nic); + + /* Init RBDR tasklet which will refill RBDR */ + tasklet_init(&nic->rbdr_task, nicvf_rbdr_task, + (unsigned long)nic); + INIT_DELAYED_WORK(&nic->rbdr_work, nicvf_rbdr_work); + + /* Configure CPI alorithm */ + nic->cpi_alg = cpi_alg; + nicvf_config_cpi(nic); + + /* Configure receive side scaling */ + nicvf_rss_init(nic); + + err = nicvf_register_interrupts(nic); + if (err) + goto cleanup; + + /* Initialize the queues */ + err = nicvf_init_resources(nic); + if (err) + goto cleanup; + + /* Make sure queue initialization is written */ + wmb(); + + nicvf_reg_write(nic, NIC_VF_INT, -1); + /* Enable Qset err interrupt */ + nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0); + + /* Enable completion queue interrupt */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx); + + /* Enable RBDR threshold interrupt */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx); + + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + + return 0; +cleanup: + nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0); + nicvf_unregister_interrupts(nic); +napi_del: + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + cq_poll = nic->napi[qidx]; + if (!cq_poll) + continue; + napi_disable(&cq_poll->napi); + netif_napi_del(&cq_poll->napi); + kfree(cq_poll); + nic->napi[qidx] = NULL; + } + return err; +} + +static int nicvf_update_hw_max_frs(struct nicvf *nic, int mtu) +{ + union nic_mbx mbx = {}; + + mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS; + mbx.frs.max_frs = mtu; + mbx.frs.vf_id = nic->vf_id; + + return nicvf_send_msg_to_pf(nic, &mbx); +} + +static int nicvf_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct nicvf *nic = netdev_priv(netdev); + + if (new_mtu > NIC_HW_MAX_FRS) + return -EINVAL; + + if (new_mtu < NIC_HW_MIN_FRS) + return -EINVAL; + + if (nicvf_update_hw_max_frs(nic, new_mtu)) + return -EINVAL; + netdev->mtu = new_mtu; + nic->mtu = new_mtu; + + return 0; +} + +static int nicvf_set_mac_address(struct net_device *netdev, void *p) +{ + struct sockaddr *addr = p; + struct nicvf *nic = netdev_priv(netdev); + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + + if (nic->msix_enabled) + if (nicvf_hw_set_mac_addr(nic, netdev)) + return -EBUSY; + + return 0; +} + +static void nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx) +{ + if (bgx->rx) + nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats; + else + nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats; +} + +void nicvf_update_lmac_stats(struct nicvf *nic) +{ + int stat = 0; + union nic_mbx mbx = {}; + int timeout; + + if (!netif_running(nic->netdev)) + return; + + mbx.bgx_stats.msg = NIC_MBOX_MSG_BGX_STATS; + mbx.bgx_stats.vf_id = nic->vf_id; + /* Rx stats */ + mbx.bgx_stats.rx = 1; + while (stat < BGX_RX_STATS_COUNT) { + nic->bgx_stats_acked = 0; + mbx.bgx_stats.idx = stat; + nicvf_send_msg_to_pf(nic, &mbx); + timeout = 0; + while ((!nic->bgx_stats_acked) && (timeout < 10)) { + msleep(2); + timeout++; + } + stat++; + } + + stat = 0; + + /* Tx stats */ + mbx.bgx_stats.rx = 0; + while (stat < BGX_TX_STATS_COUNT) { + nic->bgx_stats_acked = 0; + mbx.bgx_stats.idx = stat; + nicvf_send_msg_to_pf(nic, &mbx); + timeout = 0; + while ((!nic->bgx_stats_acked) && (timeout < 10)) { + msleep(2); + timeout++; + } + stat++; + } +} + +void nicvf_update_stats(struct nicvf *nic) +{ + int qidx; + struct nicvf_hw_stats *stats = &nic->stats; + struct nicvf_drv_stats *drv_stats = &nic->drv_stats; + struct queue_set *qs = nic->qs; + +#define GET_RX_STATS(reg) \ + nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | (reg << 3)) +#define GET_TX_STATS(reg) \ + nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | (reg << 3)) + + stats->rx_bytes_ok = GET_RX_STATS(RX_OCTS); + stats->rx_ucast_frames_ok = GET_RX_STATS(RX_UCAST); + stats->rx_bcast_frames_ok = GET_RX_STATS(RX_BCAST); + stats->rx_mcast_frames_ok = GET_RX_STATS(RX_MCAST); + stats->rx_fcs_errors = GET_RX_STATS(RX_FCS); + stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR); + stats->rx_drop_red = GET_RX_STATS(RX_RED); + stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN); + stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST); + stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST); + stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST); + stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST); + + stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS); + stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST); + stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST); + stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST); + stats->tx_drops = GET_TX_STATS(TX_DROP); + + drv_stats->rx_frames_ok = stats->rx_ucast_frames_ok + + stats->rx_bcast_frames_ok + + stats->rx_mcast_frames_ok; + drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok + + stats->tx_bcast_frames_ok + + stats->tx_mcast_frames_ok; + drv_stats->rx_drops = stats->rx_drop_red + + stats->rx_drop_overrun; + drv_stats->tx_drops = stats->tx_drops; + + /* Update RQ and SQ stats */ + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_update_rq_stats(nic, qidx); + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_update_sq_stats(nic, qidx); +} + +struct rtnl_link_stats64 *nicvf_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct nicvf *nic = netdev_priv(netdev); + struct nicvf_hw_stats *hw_stats = &nic->stats; + struct nicvf_drv_stats *drv_stats = &nic->drv_stats; + + nicvf_update_stats(nic); + + stats->rx_bytes = hw_stats->rx_bytes_ok; + stats->rx_packets = drv_stats->rx_frames_ok; + stats->rx_dropped = drv_stats->rx_drops; + + stats->tx_bytes = hw_stats->tx_bytes_ok; + stats->tx_packets = drv_stats->tx_frames_ok; + stats->tx_dropped = drv_stats->tx_drops; + + return stats; +} + +static void nicvf_tx_timeout(struct net_device *dev) +{ + struct nicvf *nic = netdev_priv(dev); + + if (netif_msg_tx_err(nic)) + netdev_warn(dev, "%s: Transmit timed out, resetting\n", + dev->name); + + schedule_work(&nic->reset_task); +} + +static void nicvf_reset_task(struct work_struct *work) +{ + struct nicvf *nic; + + nic = container_of(work, struct nicvf, reset_task); + + if (!netif_running(nic->netdev)) + return; + + nicvf_stop(nic->netdev); + nicvf_open(nic->netdev); + nic->netdev->trans_start = jiffies; +} + +static const struct net_device_ops nicvf_netdev_ops = { + .ndo_open = nicvf_open, + .ndo_stop = nicvf_stop, + .ndo_start_xmit = nicvf_xmit, + .ndo_change_mtu = nicvf_change_mtu, + .ndo_set_mac_address = nicvf_set_mac_address, + .ndo_get_stats64 = nicvf_get_stats64, + .ndo_tx_timeout = nicvf_tx_timeout, +}; + +static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct device *dev = &pdev->dev; + struct net_device *netdev; + struct nicvf *nic; + struct queue_set *qs; + int err; + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + err = pci_set_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "Unable to get usable DMA configuration\n"); + goto err_release_regions; + } + + err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(48)); + if (err) { + dev_err(dev, "unable to get 48-bit DMA for consistent allocations\n"); + goto err_release_regions; + } + + netdev = alloc_etherdev_mqs(sizeof(struct nicvf), + MAX_RCV_QUEUES_PER_QS, + MAX_SND_QUEUES_PER_QS); + if (!netdev) { + err = -ENOMEM; + goto err_release_regions; + } + + pci_set_drvdata(pdev, netdev); + + SET_NETDEV_DEV(netdev, &pdev->dev); + + nic = netdev_priv(netdev); + nic->netdev = netdev; + nic->pdev = pdev; + + /* MAP VF's configuration registers */ + nic->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!nic->reg_base) { + dev_err(dev, "Cannot map config register space, aborting\n"); + err = -ENOMEM; + goto err_free_netdev; + } + + err = nicvf_set_qset_resources(nic); + if (err) + goto err_free_netdev; + + qs = nic->qs; + + err = nicvf_set_real_num_queues(netdev, qs->sq_cnt, qs->rq_cnt); + if (err) + goto err_free_netdev; + + /* Check if PF is alive and get MAC address for this VF */ + err = nicvf_register_misc_interrupt(nic); + if (err) + goto err_free_netdev; + + netdev->features |= (NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG | + NETIF_F_TSO | NETIF_F_GRO); + netdev->hw_features = netdev->features; + + netdev->netdev_ops = &nicvf_netdev_ops; + + INIT_WORK(&nic->reset_task, nicvf_reset_task); + + err = register_netdev(netdev); + if (err) { + dev_err(dev, "Failed to register netdevice\n"); + goto err_unregister_interrupts; + } + + nic->msg_enable = debug; + + nicvf_set_ethtool_ops(netdev); + + return 0; + +err_unregister_interrupts: + nicvf_unregister_interrupts(nic); +err_free_netdev: + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); + return err; +} + +static void nicvf_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct nicvf *nic = netdev_priv(netdev); + + unregister_netdev(netdev); + nicvf_unregister_interrupts(nic); + pci_set_drvdata(pdev, NULL); + free_netdev(netdev); + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_driver nicvf_driver = { + .name = DRV_NAME, + .id_table = nicvf_id_table, + .probe = nicvf_probe, + .remove = nicvf_remove, +}; + +static int __init nicvf_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&nicvf_driver); +} + +static void __exit nicvf_cleanup_module(void) +{ + pci_unregister_driver(&nicvf_driver); +} + +module_init(nicvf_init_module); +module_exit(nicvf_cleanup_module); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c new file mode 100644 index 000000000000..196246665444 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -0,0 +1,1544 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <linux/etherdevice.h> +#include <net/ip.h> +#include <net/tso.h> + +#include "nic_reg.h" +#include "nic.h" +#include "q_struct.h" +#include "nicvf_queues.h" + +struct rbuf_info { + struct page *page; + void *data; + u64 offset; +}; + +#define GET_RBUF_INFO(x) ((struct rbuf_info *)(x - NICVF_RCV_BUF_ALIGN_BYTES)) + +/* Poll a register for a specific value */ +static int nicvf_poll_reg(struct nicvf *nic, int qidx, + u64 reg, int bit_pos, int bits, int val) +{ + u64 bit_mask; + u64 reg_val; + int timeout = 10; + + bit_mask = (1ULL << bits) - 1; + bit_mask = (bit_mask << bit_pos); + + while (timeout) { + reg_val = nicvf_queue_reg_read(nic, reg, qidx); + if (((reg_val & bit_mask) >> bit_pos) == val) + return 0; + usleep_range(1000, 2000); + timeout--; + } + netdev_err(nic->netdev, "Poll on reg 0x%llx failed\n", reg); + return 1; +} + +/* Allocate memory for a queue's descriptors */ +static int nicvf_alloc_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem, + int q_len, int desc_size, int align_bytes) +{ + dmem->q_len = q_len; + dmem->size = (desc_size * q_len) + align_bytes; + /* Save address, need it while freeing */ + dmem->unalign_base = dma_zalloc_coherent(&nic->pdev->dev, dmem->size, + &dmem->dma, GFP_KERNEL); + if (!dmem->unalign_base) + return -ENOMEM; + + /* Align memory address for 'align_bytes' */ + dmem->phys_base = NICVF_ALIGNED_ADDR((u64)dmem->dma, align_bytes); + dmem->base = (void *)((u8 *)dmem->unalign_base + + (dmem->phys_base - dmem->dma)); + return 0; +} + +/* Free queue's descriptor memory */ +static void nicvf_free_q_desc_mem(struct nicvf *nic, struct q_desc_mem *dmem) +{ + if (!dmem) + return; + + dma_free_coherent(&nic->pdev->dev, dmem->size, + dmem->unalign_base, dmem->dma); + dmem->unalign_base = NULL; + dmem->base = NULL; +} + +/* Allocate buffer for packet reception + * HW returns memory address where packet is DMA'ed but not a pointer + * into RBDR ring, so save buffer address at the start of fragment and + * align the start address to a cache aligned address + */ +static inline int nicvf_alloc_rcv_buffer(struct nicvf *nic, gfp_t gfp, + u32 buf_len, u64 **rbuf) +{ + u64 data; + struct rbuf_info *rinfo; + int order = get_order(buf_len); + + /* Check if request can be accomodated in previous allocated page */ + if (nic->rb_page) { + if ((nic->rb_page_offset + buf_len + buf_len) > + (PAGE_SIZE << order)) { + nic->rb_page = NULL; + } else { + nic->rb_page_offset += buf_len; + get_page(nic->rb_page); + } + } + + /* Allocate a new page */ + if (!nic->rb_page) { + nic->rb_page = alloc_pages(gfp | __GFP_COMP, order); + if (!nic->rb_page) { + netdev_err(nic->netdev, "Failed to allocate new rcv buffer\n"); + return -ENOMEM; + } + nic->rb_page_offset = 0; + } + + data = (u64)page_address(nic->rb_page) + nic->rb_page_offset; + + /* Align buffer addr to cache line i.e 128 bytes */ + rinfo = (struct rbuf_info *)(data + NICVF_RCV_BUF_ALIGN_LEN(data)); + /* Save page address for reference updation */ + rinfo->page = nic->rb_page; + /* Store start address for later retrieval */ + rinfo->data = (void *)data; + /* Store alignment offset */ + rinfo->offset = NICVF_RCV_BUF_ALIGN_LEN(data); + + data += rinfo->offset; + + /* Give next aligned address to hw for DMA */ + *rbuf = (u64 *)(data + NICVF_RCV_BUF_ALIGN_BYTES); + return 0; +} + +/* Retrieve actual buffer start address and build skb for received packet */ +static struct sk_buff *nicvf_rb_ptr_to_skb(struct nicvf *nic, + u64 rb_ptr, int len) +{ + struct sk_buff *skb; + struct rbuf_info *rinfo; + + rb_ptr = (u64)phys_to_virt(rb_ptr); + /* Get buffer start address and alignment offset */ + rinfo = GET_RBUF_INFO(rb_ptr); + + /* Now build an skb to give to stack */ + skb = build_skb(rinfo->data, RCV_FRAG_LEN); + if (!skb) { + put_page(rinfo->page); + return NULL; + } + + /* Set correct skb->data */ + skb_reserve(skb, rinfo->offset + NICVF_RCV_BUF_ALIGN_BYTES); + + prefetch((void *)rb_ptr); + return skb; +} + +/* Allocate RBDR ring and populate receive buffers */ +static int nicvf_init_rbdr(struct nicvf *nic, struct rbdr *rbdr, + int ring_len, int buf_size) +{ + int idx; + u64 *rbuf; + struct rbdr_entry_t *desc; + int err; + + err = nicvf_alloc_q_desc_mem(nic, &rbdr->dmem, ring_len, + sizeof(struct rbdr_entry_t), + NICVF_RCV_BUF_ALIGN_BYTES); + if (err) + return err; + + rbdr->desc = rbdr->dmem.base; + /* Buffer size has to be in multiples of 128 bytes */ + rbdr->dma_size = buf_size; + rbdr->enable = true; + rbdr->thresh = RBDR_THRESH; + + nic->rb_page = NULL; + for (idx = 0; idx < ring_len; idx++) { + err = nicvf_alloc_rcv_buffer(nic, GFP_KERNEL, RCV_FRAG_LEN, + &rbuf); + if (err) + return err; + + desc = GET_RBDR_DESC(rbdr, idx); + desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; + } + return 0; +} + +/* Free RBDR ring and its receive buffers */ +static void nicvf_free_rbdr(struct nicvf *nic, struct rbdr *rbdr) +{ + int head, tail; + u64 buf_addr; + struct rbdr_entry_t *desc; + struct rbuf_info *rinfo; + + if (!rbdr) + return; + + rbdr->enable = false; + if (!rbdr->dmem.base) + return; + + head = rbdr->head; + tail = rbdr->tail; + + /* Free SKBs */ + while (head != tail) { + desc = GET_RBDR_DESC(rbdr, head); + buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; + rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); + put_page(rinfo->page); + head++; + head &= (rbdr->dmem.q_len - 1); + } + /* Free SKB of tail desc */ + desc = GET_RBDR_DESC(rbdr, tail); + buf_addr = desc->buf_addr << NICVF_RCV_BUF_ALIGN; + rinfo = GET_RBUF_INFO((u64)phys_to_virt(buf_addr)); + put_page(rinfo->page); + + /* Free RBDR ring */ + nicvf_free_q_desc_mem(nic, &rbdr->dmem); +} + +/* Refill receive buffer descriptors with new buffers. + */ +void nicvf_refill_rbdr(struct nicvf *nic, gfp_t gfp) +{ + struct queue_set *qs = nic->qs; + int rbdr_idx = qs->rbdr_cnt; + int tail, qcount; + int refill_rb_cnt; + struct rbdr *rbdr; + struct rbdr_entry_t *desc; + u64 *rbuf; + int new_rb = 0; + +refill: + if (!rbdr_idx) + return; + rbdr_idx--; + rbdr = &qs->rbdr[rbdr_idx]; + /* Check if it's enabled */ + if (!rbdr->enable) + goto next_rbdr; + + /* Get no of desc's to be refilled */ + qcount = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, rbdr_idx); + qcount &= 0x7FFFF; + /* Doorbell can be ringed with a max of ring size minus 1 */ + if (qcount >= (qs->rbdr_len - 1)) + goto next_rbdr; + else + refill_rb_cnt = qs->rbdr_len - qcount - 1; + + /* Start filling descs from tail */ + tail = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_TAIL, rbdr_idx) >> 3; + while (refill_rb_cnt) { + tail++; + tail &= (rbdr->dmem.q_len - 1); + + if (nicvf_alloc_rcv_buffer(nic, gfp, RCV_FRAG_LEN, &rbuf)) + break; + + desc = GET_RBDR_DESC(rbdr, tail); + desc->buf_addr = virt_to_phys(rbuf) >> NICVF_RCV_BUF_ALIGN; + refill_rb_cnt--; + new_rb++; + } + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Check if buffer allocation failed */ + if (refill_rb_cnt) + nic->rb_alloc_fail = true; + else + nic->rb_alloc_fail = false; + + /* Notify HW */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, + rbdr_idx, new_rb); +next_rbdr: + /* Re-enable RBDR interrupts only if buffer allocation is success */ + if (!nic->rb_alloc_fail && rbdr->enable) + nicvf_enable_intr(nic, NICVF_INTR_RBDR, rbdr_idx); + + if (rbdr_idx) + goto refill; +} + +/* Alloc rcv buffers in non-atomic mode for better success */ +void nicvf_rbdr_work(struct work_struct *work) +{ + struct nicvf *nic = container_of(work, struct nicvf, rbdr_work.work); + + nicvf_refill_rbdr(nic, GFP_KERNEL); + if (nic->rb_alloc_fail) + schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10)); + else + nic->rb_work_scheduled = false; +} + +/* In Softirq context, alloc rcv buffers in atomic mode */ +void nicvf_rbdr_task(unsigned long data) +{ + struct nicvf *nic = (struct nicvf *)data; + + nicvf_refill_rbdr(nic, GFP_ATOMIC); + if (nic->rb_alloc_fail) { + nic->rb_work_scheduled = true; + schedule_delayed_work(&nic->rbdr_work, msecs_to_jiffies(10)); + } +} + +/* Initialize completion queue */ +static int nicvf_init_cmp_queue(struct nicvf *nic, + struct cmp_queue *cq, int q_len) +{ + int err; + + err = nicvf_alloc_q_desc_mem(nic, &cq->dmem, q_len, CMP_QUEUE_DESC_SIZE, + NICVF_CQ_BASE_ALIGN_BYTES); + if (err) + return err; + + cq->desc = cq->dmem.base; + cq->thresh = CMP_QUEUE_CQE_THRESH; + nic->cq_coalesce_usecs = (CMP_QUEUE_TIMER_THRESH * 0.05) - 1; + + return 0; +} + +static void nicvf_free_cmp_queue(struct nicvf *nic, struct cmp_queue *cq) +{ + if (!cq) + return; + if (!cq->dmem.base) + return; + + nicvf_free_q_desc_mem(nic, &cq->dmem); +} + +/* Initialize transmit queue */ +static int nicvf_init_snd_queue(struct nicvf *nic, + struct snd_queue *sq, int q_len) +{ + int err; + + err = nicvf_alloc_q_desc_mem(nic, &sq->dmem, q_len, SND_QUEUE_DESC_SIZE, + NICVF_SQ_BASE_ALIGN_BYTES); + if (err) + return err; + + sq->desc = sq->dmem.base; + sq->skbuff = kcalloc(q_len, sizeof(u64), GFP_ATOMIC); + sq->head = 0; + sq->tail = 0; + atomic_set(&sq->free_cnt, q_len - 1); + sq->thresh = SND_QUEUE_THRESH; + + /* Preallocate memory for TSO segment's header */ + sq->tso_hdrs = dma_alloc_coherent(&nic->pdev->dev, + q_len * TSO_HEADER_SIZE, + &sq->tso_hdrs_phys, GFP_KERNEL); + if (!sq->tso_hdrs) + return -ENOMEM; + + return 0; +} + +static void nicvf_free_snd_queue(struct nicvf *nic, struct snd_queue *sq) +{ + if (!sq) + return; + if (!sq->dmem.base) + return; + + if (sq->tso_hdrs) + dma_free_coherent(&nic->pdev->dev, sq->dmem.q_len, + sq->tso_hdrs, sq->tso_hdrs_phys); + + kfree(sq->skbuff); + nicvf_free_q_desc_mem(nic, &sq->dmem); +} + +static void nicvf_reclaim_snd_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + /* Disable send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, 0); + /* Check if SQ is stopped */ + if (nicvf_poll_reg(nic, qidx, NIC_QSET_SQ_0_7_STATUS, 21, 1, 0x01)) + return; + /* Reset send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET); +} + +static void nicvf_reclaim_rcv_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + union nic_mbx mbx = {}; + + /* Make sure all packets in the pipeline are written back into mem */ + mbx.msg.msg = NIC_MBOX_MSG_RQ_SW_SYNC; + nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_reclaim_cmp_queue(struct nicvf *nic, + struct queue_set *qs, int qidx) +{ + /* Disable timer threshold (doesn't get reset upon CQ reset */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, qidx, 0); + /* Disable completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, 0); + /* Reset completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET); +} + +static void nicvf_reclaim_rbdr(struct nicvf *nic, + struct rbdr *rbdr, int qidx) +{ + u64 tmp, fifo_state; + int timeout = 10; + + /* Save head and tail pointers for feeing up buffers */ + rbdr->head = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_HEAD, + qidx) >> 3; + rbdr->tail = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_TAIL, + qidx) >> 3; + + /* If RBDR FIFO is in 'FAIL' state then do a reset first + * before relaiming. + */ + fifo_state = nicvf_queue_reg_read(nic, NIC_QSET_RBDR_0_1_STATUS0, qidx); + if (((fifo_state >> 62) & 0x03) == 0x3) + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, NICVF_RBDR_RESET); + + /* Disable RBDR */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0); + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00)) + return; + while (1) { + tmp = nicvf_queue_reg_read(nic, + NIC_QSET_RBDR_0_1_PREFETCH_STATUS, + qidx); + if ((tmp & 0xFFFFFFFF) == ((tmp >> 32) & 0xFFFFFFFF)) + break; + usleep_range(1000, 2000); + timeout--; + if (!timeout) { + netdev_err(nic->netdev, + "Failed polling on prefetch status\n"); + return; + } + } + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, NICVF_RBDR_RESET); + + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x02)) + return; + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, qidx, 0x00); + if (nicvf_poll_reg(nic, qidx, NIC_QSET_RBDR_0_1_STATUS0, 62, 2, 0x00)) + return; +} + +/* Configures receive queue */ +static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + union nic_mbx mbx = {}; + struct rcv_queue *rq; + struct rq_cfg rq_cfg; + + rq = &qs->rq[qidx]; + rq->enable = enable; + + /* Disable receive queue */ + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, 0); + + if (!rq->enable) { + nicvf_reclaim_rcv_queue(nic, qs, qidx); + return; + } + + rq->cq_qs = qs->vnic_id; + rq->cq_idx = qidx; + rq->start_rbdr_qs = qs->vnic_id; + rq->start_qs_rbdr_idx = qs->rbdr_cnt - 1; + rq->cont_rbdr_qs = qs->vnic_id; + rq->cont_qs_rbdr_idx = qs->rbdr_cnt - 1; + /* all writes of RBDR data to be loaded into L2 Cache as well*/ + rq->caching = 1; + + /* Send a mailbox msg to PF to config RQ */ + mbx.rq.msg = NIC_MBOX_MSG_RQ_CFG; + mbx.rq.qs_num = qs->vnic_id; + mbx.rq.rq_num = qidx; + mbx.rq.cfg = (rq->caching << 26) | (rq->cq_qs << 19) | + (rq->cq_idx << 16) | (rq->cont_rbdr_qs << 9) | + (rq->cont_qs_rbdr_idx << 8) | + (rq->start_rbdr_qs << 1) | (rq->start_qs_rbdr_idx); + nicvf_send_msg_to_pf(nic, &mbx); + + mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG; + mbx.rq.cfg = (1ULL << 63) | (1ULL << 62) | (qs->vnic_id << 0); + nicvf_send_msg_to_pf(nic, &mbx); + + /* RQ drop config + * Enable CQ drop to reserve sufficient CQEs for all tx packets + */ + mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG; + mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8); + nicvf_send_msg_to_pf(nic, &mbx); + + nicvf_queue_reg_write(nic, NIC_QSET_RQ_GEN_CFG, qidx, 0x00); + + /* Enable Receive queue */ + rq_cfg.ena = 1; + rq_cfg.tcp_ena = 0; + nicvf_queue_reg_write(nic, NIC_QSET_RQ_0_7_CFG, qidx, *(u64 *)&rq_cfg); +} + +/* Configures completion queue */ +void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + struct cmp_queue *cq; + struct cq_cfg cq_cfg; + + cq = &qs->cq[qidx]; + cq->enable = enable; + + if (!cq->enable) { + nicvf_reclaim_cmp_queue(nic, qs, qidx); + return; + } + + /* Reset completion queue */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, NICVF_CQ_RESET); + + if (!cq->enable) + return; + + spin_lock_init(&cq->lock); + /* Set completion queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_BASE, + qidx, (u64)(cq->dmem.phys_base)); + + /* Enable Completion queue */ + cq_cfg.ena = 1; + cq_cfg.reset = 0; + cq_cfg.caching = 0; + cq_cfg.qsize = CMP_QSIZE; + cq_cfg.avg_con = 0; + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG, qidx, *(u64 *)&cq_cfg); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_THRESH, qidx, cq->thresh); + nicvf_queue_reg_write(nic, NIC_QSET_CQ_0_7_CFG2, + qidx, nic->cq_coalesce_usecs); +} + +/* Configures transmit queue */ +static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + union nic_mbx mbx = {}; + struct snd_queue *sq; + struct sq_cfg sq_cfg; + + sq = &qs->sq[qidx]; + sq->enable = enable; + + if (!sq->enable) { + nicvf_reclaim_snd_queue(nic, qs, qidx); + return; + } + + /* Reset send queue */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, NICVF_SQ_RESET); + + sq->cq_qs = qs->vnic_id; + sq->cq_idx = qidx; + + /* Send a mailbox msg to PF to config SQ */ + mbx.sq.msg = NIC_MBOX_MSG_SQ_CFG; + mbx.sq.qs_num = qs->vnic_id; + mbx.sq.sq_num = qidx; + mbx.sq.cfg = (sq->cq_qs << 3) | sq->cq_idx; + nicvf_send_msg_to_pf(nic, &mbx); + + /* Set queue base address */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_BASE, + qidx, (u64)(sq->dmem.phys_base)); + + /* Enable send queue & set queue size */ + sq_cfg.ena = 1; + sq_cfg.reset = 0; + sq_cfg.ldwb = 0; + sq_cfg.qsize = SND_QSIZE; + sq_cfg.tstmp_bgx_intf = 0; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_THRESH, qidx, sq->thresh); + + /* Set queue:cpu affinity for better load distribution */ + if (cpu_online(qidx)) { + cpumask_set_cpu(qidx, &sq->affinity_mask); + netif_set_xps_queue(nic->netdev, + &sq->affinity_mask, qidx); + } +} + +/* Configures receive buffer descriptor ring */ +static void nicvf_rbdr_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable) +{ + struct rbdr *rbdr; + struct rbdr_cfg rbdr_cfg; + + rbdr = &qs->rbdr[qidx]; + nicvf_reclaim_rbdr(nic, rbdr, qidx); + if (!enable) + return; + + /* Set descriptor base address */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_BASE, + qidx, (u64)(rbdr->dmem.phys_base)); + + /* Enable RBDR & set queue size */ + /* Buffer size should be in multiples of 128 bytes */ + rbdr_cfg.ena = 1; + rbdr_cfg.reset = 0; + rbdr_cfg.ldwb = 0; + rbdr_cfg.qsize = RBDR_SIZE; + rbdr_cfg.avg_con = 0; + rbdr_cfg.lines = rbdr->dma_size / 128; + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_CFG, + qidx, *(u64 *)&rbdr_cfg); + + /* Notify HW */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_DOOR, + qidx, qs->rbdr_len - 1); + + /* Set threshold value for interrupt generation */ + nicvf_queue_reg_write(nic, NIC_QSET_RBDR_0_1_THRESH, + qidx, rbdr->thresh - 1); +} + +/* Requests PF to assign and enable Qset */ +void nicvf_qset_config(struct nicvf *nic, bool enable) +{ + union nic_mbx mbx = {}; + struct queue_set *qs = nic->qs; + struct qs_cfg *qs_cfg; + + if (!qs) { + netdev_warn(nic->netdev, + "Qset is still not allocated, don't init queues\n"); + return; + } + + qs->enable = enable; + qs->vnic_id = nic->vf_id; + + /* Send a mailbox msg to PF to config Qset */ + mbx.qs.msg = NIC_MBOX_MSG_QS_CFG; + mbx.qs.num = qs->vnic_id; + + mbx.qs.cfg = 0; + qs_cfg = (struct qs_cfg *)&mbx.qs.cfg; + if (qs->enable) { + qs_cfg->ena = 1; +#ifdef __BIG_ENDIAN + qs_cfg->be = 1; +#endif + qs_cfg->vnic = qs->vnic_id; + } + nicvf_send_msg_to_pf(nic, &mbx); +} + +static void nicvf_free_resources(struct nicvf *nic) +{ + int qidx; + struct queue_set *qs = nic->qs; + + /* Free receive buffer descriptor ring */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_free_rbdr(nic, &qs->rbdr[qidx]); + + /* Free completion queue */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_free_cmp_queue(nic, &qs->cq[qidx]); + + /* Free send queue */ + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_free_snd_queue(nic, &qs->sq[qidx]); +} + +static int nicvf_alloc_resources(struct nicvf *nic) +{ + int qidx; + struct queue_set *qs = nic->qs; + + /* Alloc receive buffer descriptor ring */ + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) { + if (nicvf_init_rbdr(nic, &qs->rbdr[qidx], qs->rbdr_len, + DMA_BUFFER_LEN)) + goto alloc_fail; + } + + /* Alloc send queue */ + for (qidx = 0; qidx < qs->sq_cnt; qidx++) { + if (nicvf_init_snd_queue(nic, &qs->sq[qidx], qs->sq_len)) + goto alloc_fail; + } + + /* Alloc completion queue */ + for (qidx = 0; qidx < qs->cq_cnt; qidx++) { + if (nicvf_init_cmp_queue(nic, &qs->cq[qidx], qs->cq_len)) + goto alloc_fail; + } + + return 0; +alloc_fail: + nicvf_free_resources(nic); + return -ENOMEM; +} + +int nicvf_set_qset_resources(struct nicvf *nic) +{ + struct queue_set *qs; + + qs = devm_kzalloc(&nic->pdev->dev, sizeof(*qs), GFP_KERNEL); + if (!qs) + return -ENOMEM; + nic->qs = qs; + + /* Set count of each queue */ + qs->rbdr_cnt = RBDR_CNT; + qs->rq_cnt = RCV_QUEUE_CNT; + qs->sq_cnt = SND_QUEUE_CNT; + qs->cq_cnt = CMP_QUEUE_CNT; + + /* Set queue lengths */ + qs->rbdr_len = RCV_BUF_COUNT; + qs->sq_len = SND_QUEUE_LEN; + qs->cq_len = CMP_QUEUE_LEN; + return 0; +} + +int nicvf_config_data_transfer(struct nicvf *nic, bool enable) +{ + bool disable = false; + struct queue_set *qs = nic->qs; + int qidx; + + if (!qs) + return 0; + + if (enable) { + if (nicvf_alloc_resources(nic)) + return -ENOMEM; + + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_snd_queue_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_cmp_queue_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_rbdr_config(nic, qs, qidx, enable); + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_rcv_queue_config(nic, qs, qidx, enable); + } else { + for (qidx = 0; qidx < qs->rq_cnt; qidx++) + nicvf_rcv_queue_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) + nicvf_rbdr_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->sq_cnt; qidx++) + nicvf_snd_queue_config(nic, qs, qidx, disable); + for (qidx = 0; qidx < qs->cq_cnt; qidx++) + nicvf_cmp_queue_config(nic, qs, qidx, disable); + + nicvf_free_resources(nic); + } + + return 0; +} + +/* Get a free desc from SQ + * returns descriptor ponter & descriptor number + */ +static inline int nicvf_get_sq_desc(struct snd_queue *sq, int desc_cnt) +{ + int qentry; + + qentry = sq->tail; + atomic_sub(desc_cnt, &sq->free_cnt); + sq->tail += desc_cnt; + sq->tail &= (sq->dmem.q_len - 1); + + return qentry; +} + +/* Free descriptor back to SQ for future use */ +void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt) +{ + atomic_add(desc_cnt, &sq->free_cnt); + sq->head += desc_cnt; + sq->head &= (sq->dmem.q_len - 1); +} + +static inline int nicvf_get_nxt_sqentry(struct snd_queue *sq, int qentry) +{ + qentry++; + qentry &= (sq->dmem.q_len - 1); + return qentry; +} + +void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx) +{ + u64 sq_cfg; + + sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx); + sq_cfg |= NICVF_SQ_EN; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg); + /* Ring doorbell so that H/W restarts processing SQEs */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, qidx, 0); +} + +void nicvf_sq_disable(struct nicvf *nic, int qidx) +{ + u64 sq_cfg; + + sq_cfg = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_CFG, qidx); + sq_cfg &= ~NICVF_SQ_EN; + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, sq_cfg); +} + +void nicvf_sq_free_used_descs(struct net_device *netdev, struct snd_queue *sq, + int qidx) +{ + u64 head, tail; + struct sk_buff *skb; + struct nicvf *nic = netdev_priv(netdev); + struct sq_hdr_subdesc *hdr; + + head = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_HEAD, qidx) >> 4; + tail = nicvf_queue_reg_read(nic, NIC_QSET_SQ_0_7_TAIL, qidx) >> 4; + while (sq->head != head) { + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, sq->head); + if (hdr->subdesc_type != SQ_DESC_TYPE_HEADER) { + nicvf_put_sq_desc(sq, 1); + continue; + } + skb = (struct sk_buff *)sq->skbuff[sq->head]; + atomic64_add(1, (atomic64_t *)&netdev->stats.tx_packets); + atomic64_add(hdr->tot_len, + (atomic64_t *)&netdev->stats.tx_bytes); + dev_kfree_skb_any(skb); + nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); + } +} + +/* Calculate no of SQ subdescriptors needed to transmit all + * segments of this TSO packet. + * Taken from 'Tilera network driver' with a minor modification. + */ +static int nicvf_tso_count_subdescs(struct sk_buff *skb) +{ + struct skb_shared_info *sh = skb_shinfo(skb); + unsigned int sh_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + unsigned int data_len = skb->len - sh_len; + unsigned int p_len = sh->gso_size; + long f_id = -1; /* id of the current fragment */ + long f_size = skb_headlen(skb) - sh_len; /* current fragment size */ + long f_used = 0; /* bytes used from the current fragment */ + long n; /* size of the current piece of payload */ + int num_edescs = 0; + int segment; + + for (segment = 0; segment < sh->gso_segs; segment++) { + unsigned int p_used = 0; + + /* One edesc for header and for each piece of the payload. */ + for (num_edescs++; p_used < p_len; num_edescs++) { + /* Advance as needed. */ + while (f_used >= f_size) { + f_id++; + f_size = skb_frag_size(&sh->frags[f_id]); + f_used = 0; + } + + /* Use bytes from the current fragment. */ + n = p_len - p_used; + if (n > f_size - f_used) + n = f_size - f_used; + f_used += n; + p_used += n; + } + + /* The last segment may be less than gso_size. */ + data_len -= p_len; + if (data_len < p_len) + p_len = data_len; + } + + /* '+ gso_segs' for SQ_HDR_SUDESCs for each segment */ + return num_edescs + sh->gso_segs; +} + +/* Get the number of SQ descriptors needed to xmit this skb */ +static int nicvf_sq_subdesc_required(struct nicvf *nic, struct sk_buff *skb) +{ + int subdesc_cnt = MIN_SQ_DESC_PER_PKT_XMIT; + + if (skb_shinfo(skb)->gso_size) { + subdesc_cnt = nicvf_tso_count_subdescs(skb); + return subdesc_cnt; + } + + if (skb_shinfo(skb)->nr_frags) + subdesc_cnt += skb_shinfo(skb)->nr_frags; + + return subdesc_cnt; +} + +/* Add SQ HEADER subdescriptor. + * First subdescriptor for every send descriptor. + */ +static inline void +nicvf_sq_add_hdr_subdesc(struct snd_queue *sq, int qentry, + int subdesc_cnt, struct sk_buff *skb, int len) +{ + int proto; + struct sq_hdr_subdesc *hdr; + + hdr = (struct sq_hdr_subdesc *)GET_SQ_DESC(sq, qentry); + sq->skbuff[qentry] = (u64)skb; + + memset(hdr, 0, SND_QUEUE_DESC_SIZE); + hdr->subdesc_type = SQ_DESC_TYPE_HEADER; + /* Enable notification via CQE after processing SQE */ + hdr->post_cqe = 1; + /* No of subdescriptors following this */ + hdr->subdesc_cnt = subdesc_cnt; + hdr->tot_len = len; + + /* Offload checksum calculation to HW */ + if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->protocol != htons(ETH_P_IP)) + return; + + hdr->csum_l3 = 1; /* Enable IP csum calculation */ + hdr->l3_offset = skb_network_offset(skb); + hdr->l4_offset = skb_transport_offset(skb); + + proto = ip_hdr(skb)->protocol; + switch (proto) { + case IPPROTO_TCP: + hdr->csum_l4 = SEND_L4_CSUM_TCP; + break; + case IPPROTO_UDP: + hdr->csum_l4 = SEND_L4_CSUM_UDP; + break; + case IPPROTO_SCTP: + hdr->csum_l4 = SEND_L4_CSUM_SCTP; + break; + } + } +} + +/* SQ GATHER subdescriptor + * Must follow HDR descriptor + */ +static inline void nicvf_sq_add_gather_subdesc(struct snd_queue *sq, int qentry, + int size, u64 data) +{ + struct sq_gather_subdesc *gather; + + qentry &= (sq->dmem.q_len - 1); + gather = (struct sq_gather_subdesc *)GET_SQ_DESC(sq, qentry); + + memset(gather, 0, SND_QUEUE_DESC_SIZE); + gather->subdesc_type = SQ_DESC_TYPE_GATHER; + gather->ld_type = NIC_SEND_LD_TYPE_E_LDWB; + gather->size = size; + gather->addr = data; +} + +/* Segment a TSO packet into 'gso_size' segments and append + * them to SQ for transfer + */ +static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, + int qentry, struct sk_buff *skb) +{ + struct tso_t tso; + int seg_subdescs = 0, desc_cnt = 0; + int seg_len, total_len, data_left; + int hdr_qentry = qentry; + int hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tso_start(skb, &tso); + total_len = skb->len - hdr_len; + while (total_len > 0) { + char *hdr; + + /* Save Qentry for adding HDR_SUBDESC at the end */ + hdr_qentry = qentry; + + data_left = min_t(int, skb_shinfo(skb)->gso_size, total_len); + total_len -= data_left; + + /* Add segment's header */ + qentry = nicvf_get_nxt_sqentry(sq, qentry); + hdr = sq->tso_hdrs + qentry * TSO_HEADER_SIZE; + tso_build_hdr(skb, hdr, &tso, data_left, total_len == 0); + nicvf_sq_add_gather_subdesc(sq, qentry, hdr_len, + sq->tso_hdrs_phys + + qentry * TSO_HEADER_SIZE); + /* HDR_SUDESC + GATHER */ + seg_subdescs = 2; + seg_len = hdr_len; + + /* Add segment's payload fragments */ + while (data_left > 0) { + int size; + + size = min_t(int, tso.size, data_left); + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + nicvf_sq_add_gather_subdesc(sq, qentry, size, + virt_to_phys(tso.data)); + seg_subdescs++; + seg_len += size; + + data_left -= size; + tso_build_data(skb, &tso, size); + } + nicvf_sq_add_hdr_subdesc(sq, hdr_qentry, + seg_subdescs - 1, skb, seg_len); + sq->skbuff[hdr_qentry] = 0; + qentry = nicvf_get_nxt_sqentry(sq, qentry); + + desc_cnt += seg_subdescs; + } + /* Save SKB in the last segment for freeing */ + sq->skbuff[hdr_qentry] = (u64)skb; + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + skb_get_queue_mapping(skb), desc_cnt); + return 1; +} + +/* Append an skb to a SQ for packet transfer. */ +int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb) +{ + int i, size; + int subdesc_cnt; + int sq_num, qentry; + struct queue_set *qs = nic->qs; + struct snd_queue *sq; + + sq_num = skb_get_queue_mapping(skb); + sq = &qs->sq[sq_num]; + + subdesc_cnt = nicvf_sq_subdesc_required(nic, skb); + if (subdesc_cnt > atomic_read(&sq->free_cnt)) + goto append_fail; + + qentry = nicvf_get_sq_desc(sq, subdesc_cnt); + + /* Check if its a TSO packet */ + if (skb_shinfo(skb)->gso_size) + return nicvf_sq_append_tso(nic, sq, qentry, skb); + + /* Add SQ header subdesc */ + nicvf_sq_add_hdr_subdesc(sq, qentry, subdesc_cnt - 1, skb, skb->len); + + /* Add SQ gather subdescs */ + qentry = nicvf_get_nxt_sqentry(sq, qentry); + size = skb_is_nonlinear(skb) ? skb_headlen(skb) : skb->len; + nicvf_sq_add_gather_subdesc(sq, qentry, size, virt_to_phys(skb->data)); + + /* Check for scattered buffer */ + if (!skb_is_nonlinear(skb)) + goto doorbell; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + const struct skb_frag_struct *frag; + + frag = &skb_shinfo(skb)->frags[i]; + + qentry = nicvf_get_nxt_sqentry(sq, qentry); + size = skb_frag_size(frag); + nicvf_sq_add_gather_subdesc(sq, qentry, size, + virt_to_phys( + skb_frag_address(frag))); + } + +doorbell: + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit new packet */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, subdesc_cnt); + return 1; + +append_fail: + netdev_dbg(nic->netdev, "Not enough SQ descriptors to xmit pkt\n"); + return 0; +} + +static inline unsigned frag_num(unsigned i) +{ +#ifdef __BIG_ENDIAN + return (i & ~3) + 3 - (i & 3); +#else + return i; +#endif +} + +/* Returns SKB for a received packet */ +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) +{ + int frag; + int payload_len = 0; + struct sk_buff *skb = NULL; + struct sk_buff *skb_frag = NULL; + struct sk_buff *prev_frag = NULL; + u16 *rb_lens = NULL; + u64 *rb_ptrs = NULL; + + rb_lens = (void *)cqe_rx + (3 * sizeof(u64)); + rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64)); + + netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n", + __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz); + + for (frag = 0; frag < cqe_rx->rb_cnt; frag++) { + payload_len = rb_lens[frag_num(frag)]; + if (!frag) { + /* First fragment */ + skb = nicvf_rb_ptr_to_skb(nic, + *rb_ptrs - cqe_rx->align_pad, + payload_len); + if (!skb) + return NULL; + skb_reserve(skb, cqe_rx->align_pad); + skb_put(skb, payload_len); + } else { + /* Add fragments */ + skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs, + payload_len); + if (!skb_frag) { + dev_kfree_skb(skb); + return NULL; + } + + if (!skb_shinfo(skb)->frag_list) + skb_shinfo(skb)->frag_list = skb_frag; + else + prev_frag->next = skb_frag; + + prev_frag = skb_frag; + skb->len += payload_len; + skb->data_len += payload_len; + skb_frag->len = payload_len; + } + /* Next buffer pointer */ + rb_ptrs++; + } + return skb; +} + +/* Enable interrupt */ +void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val; + + reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); + + switch (int_type) { + case NICVF_INTR_CQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + break; + case NICVF_INTR_TCP_TIMER: + reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + break; + case NICVF_INTR_MBOX: + reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); + break; + case NICVF_INTR_QS_ERR: + reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + break; + default: + netdev_err(nic->netdev, + "Failed to enable interrupt: unknown type\n"); + break; + } + + nicvf_reg_write(nic, NIC_VF_ENA_W1S, reg_val); +} + +/* Disable interrupt */ +void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val = 0; + + switch (int_type) { + case NICVF_INTR_CQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + reg_val |= ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + reg_val |= (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + break; + case NICVF_INTR_TCP_TIMER: + reg_val |= (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + break; + case NICVF_INTR_MBOX: + reg_val |= (1ULL << NICVF_INTR_MBOX_SHIFT); + break; + case NICVF_INTR_QS_ERR: + reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + break; + default: + netdev_err(nic->netdev, + "Failed to disable interrupt: unknown type\n"); + break; + } + + nicvf_reg_write(nic, NIC_VF_ENA_W1C, reg_val); +} + +/* Clear interrupt */ +void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val = 0; + + switch (int_type) { + case NICVF_INTR_CQ: + reg_val = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + reg_val = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + reg_val = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + reg_val = (1ULL << NICVF_INTR_PKT_DROP_SHIFT); + break; + case NICVF_INTR_TCP_TIMER: + reg_val = (1ULL << NICVF_INTR_TCP_TIMER_SHIFT); + break; + case NICVF_INTR_MBOX: + reg_val = (1ULL << NICVF_INTR_MBOX_SHIFT); + break; + case NICVF_INTR_QS_ERR: + reg_val |= (1ULL << NICVF_INTR_QS_ERR_SHIFT); + break; + default: + netdev_err(nic->netdev, + "Failed to clear interrupt: unknown type\n"); + break; + } + + nicvf_reg_write(nic, NIC_VF_INT, reg_val); +} + +/* Check if interrupt is enabled */ +int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx) +{ + u64 reg_val; + u64 mask = 0xff; + + reg_val = nicvf_reg_read(nic, NIC_VF_ENA_W1S); + + switch (int_type) { + case NICVF_INTR_CQ: + mask = ((1ULL << q_idx) << NICVF_INTR_CQ_SHIFT); + break; + case NICVF_INTR_SQ: + mask = ((1ULL << q_idx) << NICVF_INTR_SQ_SHIFT); + break; + case NICVF_INTR_RBDR: + mask = ((1ULL << q_idx) << NICVF_INTR_RBDR_SHIFT); + break; + case NICVF_INTR_PKT_DROP: + mask = NICVF_INTR_PKT_DROP_MASK; + break; + case NICVF_INTR_TCP_TIMER: + mask = NICVF_INTR_TCP_TIMER_MASK; + break; + case NICVF_INTR_MBOX: + mask = NICVF_INTR_MBOX_MASK; + break; + case NICVF_INTR_QS_ERR: + mask = NICVF_INTR_QS_ERR_MASK; + break; + default: + netdev_err(nic->netdev, + "Failed to check interrupt enable: unknown type\n"); + break; + } + + return (reg_val & mask); +} + +void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx) +{ + struct rcv_queue *rq; + +#define GET_RQ_STATS(reg) \ + nicvf_reg_read(nic, NIC_QSET_RQ_0_7_STAT_0_1 |\ + (rq_idx << NIC_Q_NUM_SHIFT) | (reg << 3)) + + rq = &nic->qs->rq[rq_idx]; + rq->stats.bytes = GET_RQ_STATS(RQ_SQ_STATS_OCTS); + rq->stats.pkts = GET_RQ_STATS(RQ_SQ_STATS_PKTS); +} + +void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx) +{ + struct snd_queue *sq; + +#define GET_SQ_STATS(reg) \ + nicvf_reg_read(nic, NIC_QSET_SQ_0_7_STAT_0_1 |\ + (sq_idx << NIC_Q_NUM_SHIFT) | (reg << 3)) + + sq = &nic->qs->sq[sq_idx]; + sq->stats.bytes = GET_SQ_STATS(RQ_SQ_STATS_OCTS); + sq->stats.pkts = GET_SQ_STATS(RQ_SQ_STATS_PKTS); +} + +/* Check for errors in the receive cmp.queue entry */ +int nicvf_check_cqe_rx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_rx_t *cqe_rx) +{ + struct cmp_queue_stats *stats = &cq->stats; + + if (!cqe_rx->err_level && !cqe_rx->err_opcode) { + stats->rx.errop.good++; + return 0; + } + + if (netif_msg_rx_err(nic)) + netdev_err(nic->netdev, + "%s: RX error CQE err_level 0x%x err_opcode 0x%x\n", + nic->netdev->name, + cqe_rx->err_level, cqe_rx->err_opcode); + + switch (cqe_rx->err_level) { + case CQ_ERRLVL_MAC: + stats->rx.errlvl.mac_errs++; + break; + case CQ_ERRLVL_L2: + stats->rx.errlvl.l2_errs++; + break; + case CQ_ERRLVL_L3: + stats->rx.errlvl.l3_errs++; + break; + case CQ_ERRLVL_L4: + stats->rx.errlvl.l4_errs++; + break; + } + + switch (cqe_rx->err_opcode) { + case CQ_RX_ERROP_RE_PARTIAL: + stats->rx.errop.partial_pkts++; + break; + case CQ_RX_ERROP_RE_JABBER: + stats->rx.errop.jabber_errs++; + break; + case CQ_RX_ERROP_RE_FCS: + stats->rx.errop.fcs_errs++; + break; + case CQ_RX_ERROP_RE_TERMINATE: + stats->rx.errop.terminate_errs++; + break; + case CQ_RX_ERROP_RE_RX_CTL: + stats->rx.errop.bgx_rx_errs++; + break; + case CQ_RX_ERROP_PREL2_ERR: + stats->rx.errop.prel2_errs++; + break; + case CQ_RX_ERROP_L2_FRAGMENT: + stats->rx.errop.l2_frags++; + break; + case CQ_RX_ERROP_L2_OVERRUN: + stats->rx.errop.l2_overruns++; + break; + case CQ_RX_ERROP_L2_PFCS: + stats->rx.errop.l2_pfcs++; + break; + case CQ_RX_ERROP_L2_PUNY: + stats->rx.errop.l2_puny++; + break; + case CQ_RX_ERROP_L2_MAL: + stats->rx.errop.l2_hdr_malformed++; + break; + case CQ_RX_ERROP_L2_OVERSIZE: + stats->rx.errop.l2_oversize++; + break; + case CQ_RX_ERROP_L2_UNDERSIZE: + stats->rx.errop.l2_undersize++; + break; + case CQ_RX_ERROP_L2_LENMISM: + stats->rx.errop.l2_len_mismatch++; + break; + case CQ_RX_ERROP_L2_PCLP: + stats->rx.errop.l2_pclp++; + break; + case CQ_RX_ERROP_IP_NOT: + stats->rx.errop.non_ip++; + break; + case CQ_RX_ERROP_IP_CSUM_ERR: + stats->rx.errop.ip_csum_err++; + break; + case CQ_RX_ERROP_IP_MAL: + stats->rx.errop.ip_hdr_malformed++; + break; + case CQ_RX_ERROP_IP_MALD: + stats->rx.errop.ip_payload_malformed++; + break; + case CQ_RX_ERROP_IP_HOP: + stats->rx.errop.ip_hop_errs++; + break; + case CQ_RX_ERROP_L3_ICRC: + stats->rx.errop.l3_icrc_errs++; + break; + case CQ_RX_ERROP_L3_PCLP: + stats->rx.errop.l3_pclp++; + break; + case CQ_RX_ERROP_L4_MAL: + stats->rx.errop.l4_malformed++; + break; + case CQ_RX_ERROP_L4_CHK: + stats->rx.errop.l4_csum_errs++; + break; + case CQ_RX_ERROP_UDP_LEN: + stats->rx.errop.udp_len_err++; + break; + case CQ_RX_ERROP_L4_PORT: + stats->rx.errop.bad_l4_port++; + break; + case CQ_RX_ERROP_TCP_FLAG: + stats->rx.errop.bad_tcp_flag++; + break; + case CQ_RX_ERROP_TCP_OFFSET: + stats->rx.errop.tcp_offset_errs++; + break; + case CQ_RX_ERROP_L4_PCLP: + stats->rx.errop.l4_pclp++; + break; + case CQ_RX_ERROP_RBDR_TRUNC: + stats->rx.errop.pkt_truncated++; + break; + } + + return 1; +} + +/* Check for errors in the send cmp.queue entry */ +int nicvf_check_cqe_tx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_send_t *cqe_tx) +{ + struct cmp_queue_stats *stats = &cq->stats; + + switch (cqe_tx->send_status) { + case CQ_TX_ERROP_GOOD: + stats->tx.good++; + return 0; + case CQ_TX_ERROP_DESC_FAULT: + stats->tx.desc_fault++; + break; + case CQ_TX_ERROP_HDR_CONS_ERR: + stats->tx.hdr_cons_err++; + break; + case CQ_TX_ERROP_SUBDC_ERR: + stats->tx.subdesc_err++; + break; + case CQ_TX_ERROP_IMM_SIZE_OFLOW: + stats->tx.imm_size_oflow++; + break; + case CQ_TX_ERROP_DATA_SEQUENCE_ERR: + stats->tx.data_seq_err++; + break; + case CQ_TX_ERROP_MEM_SEQUENCE_ERR: + stats->tx.mem_seq_err++; + break; + case CQ_TX_ERROP_LOCK_VIOL: + stats->tx.lock_viol++; + break; + case CQ_TX_ERROP_DATA_FAULT: + stats->tx.data_fault++; + break; + case CQ_TX_ERROP_TSTMP_CONFLICT: + stats->tx.tstmp_conflict++; + break; + case CQ_TX_ERROP_TSTMP_TIMEOUT: + stats->tx.tstmp_timeout++; + break; + case CQ_TX_ERROP_MEM_FAULT: + stats->tx.mem_fault++; + break; + case CQ_TX_ERROP_CK_OVERLAP: + stats->tx.csum_overlap++; + break; + case CQ_TX_ERROP_CK_OFLOW: + stats->tx.csum_overflow++; + break; + } + + return 1; +} diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h new file mode 100644 index 000000000000..8341bdf755d1 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -0,0 +1,381 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef NICVF_QUEUES_H +#define NICVF_QUEUES_H + +#include <linux/netdevice.h> +#include "q_struct.h" + +#define MAX_QUEUE_SET 128 +#define MAX_RCV_QUEUES_PER_QS 8 +#define MAX_RCV_BUF_DESC_RINGS_PER_QS 2 +#define MAX_SND_QUEUES_PER_QS 8 +#define MAX_CMP_QUEUES_PER_QS 8 + +/* VF's queue interrupt ranges */ +#define NICVF_INTR_ID_CQ 0 +#define NICVF_INTR_ID_SQ 8 +#define NICVF_INTR_ID_RBDR 16 +#define NICVF_INTR_ID_MISC 18 +#define NICVF_INTR_ID_QS_ERR 19 + +#define for_each_cq_irq(irq) \ + for (irq = NICVF_INTR_ID_CQ; irq < NICVF_INTR_ID_SQ; irq++) +#define for_each_sq_irq(irq) \ + for (irq = NICVF_INTR_ID_SQ; irq < NICVF_INTR_ID_RBDR; irq++) +#define for_each_rbdr_irq(irq) \ + for (irq = NICVF_INTR_ID_RBDR; irq < NICVF_INTR_ID_MISC; irq++) + +#define RBDR_SIZE0 0ULL /* 8K entries */ +#define RBDR_SIZE1 1ULL /* 16K entries */ +#define RBDR_SIZE2 2ULL /* 32K entries */ +#define RBDR_SIZE3 3ULL /* 64K entries */ +#define RBDR_SIZE4 4ULL /* 126K entries */ +#define RBDR_SIZE5 5ULL /* 256K entries */ +#define RBDR_SIZE6 6ULL /* 512K entries */ + +#define SND_QUEUE_SIZE0 0ULL /* 1K entries */ +#define SND_QUEUE_SIZE1 1ULL /* 2K entries */ +#define SND_QUEUE_SIZE2 2ULL /* 4K entries */ +#define SND_QUEUE_SIZE3 3ULL /* 8K entries */ +#define SND_QUEUE_SIZE4 4ULL /* 16K entries */ +#define SND_QUEUE_SIZE5 5ULL /* 32K entries */ +#define SND_QUEUE_SIZE6 6ULL /* 64K entries */ + +#define CMP_QUEUE_SIZE0 0ULL /* 1K entries */ +#define CMP_QUEUE_SIZE1 1ULL /* 2K entries */ +#define CMP_QUEUE_SIZE2 2ULL /* 4K entries */ +#define CMP_QUEUE_SIZE3 3ULL /* 8K entries */ +#define CMP_QUEUE_SIZE4 4ULL /* 16K entries */ +#define CMP_QUEUE_SIZE5 5ULL /* 32K entries */ +#define CMP_QUEUE_SIZE6 6ULL /* 64K entries */ + +/* Default queue count per QS, its lengths and threshold values */ +#define RBDR_CNT 1 +#define RCV_QUEUE_CNT 8 +#define SND_QUEUE_CNT 8 +#define CMP_QUEUE_CNT 8 /* Max of RCV and SND qcount */ + +#define SND_QSIZE SND_QUEUE_SIZE4 +#define SND_QUEUE_LEN (1ULL << (SND_QSIZE + 10)) +#define MAX_SND_QUEUE_LEN (1ULL << (SND_QUEUE_SIZE6 + 10)) +#define SND_QUEUE_THRESH 2ULL +#define MIN_SQ_DESC_PER_PKT_XMIT 2 +/* Since timestamp not enabled, otherwise 2 */ +#define MAX_CQE_PER_PKT_XMIT 1 + +#define CMP_QSIZE CMP_QUEUE_SIZE4 +#define CMP_QUEUE_LEN (1ULL << (CMP_QSIZE + 10)) +#define CMP_QUEUE_CQE_THRESH 0 +#define CMP_QUEUE_TIMER_THRESH 220 /* 10usec */ + +#define RBDR_SIZE RBDR_SIZE0 +#define RCV_BUF_COUNT (1ULL << (RBDR_SIZE + 13)) +#define MAX_RCV_BUF_COUNT (1ULL << (RBDR_SIZE6 + 13)) +#define RBDR_THRESH (RCV_BUF_COUNT / 2) +#define DMA_BUFFER_LEN 2048 /* In multiples of 128bytes */ +#define RCV_FRAG_LEN (SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + \ + (NICVF_RCV_BUF_ALIGN_BYTES * 2)) +#define RCV_DATA_OFFSET NICVF_RCV_BUF_ALIGN_BYTES + +#define MAX_CQES_FOR_TX ((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \ + MAX_CQE_PER_PKT_XMIT) +#define RQ_CQ_DROP ((CMP_QUEUE_LEN - MAX_CQES_FOR_TX) / 256) + +/* Descriptor size in bytes */ +#define SND_QUEUE_DESC_SIZE 16 +#define CMP_QUEUE_DESC_SIZE 512 + +/* Buffer / descriptor alignments */ +#define NICVF_RCV_BUF_ALIGN 7 +#define NICVF_RCV_BUF_ALIGN_BYTES (1ULL << NICVF_RCV_BUF_ALIGN) +#define NICVF_CQ_BASE_ALIGN_BYTES 512 /* 9 bits */ +#define NICVF_SQ_BASE_ALIGN_BYTES 128 /* 7 bits */ + +#define NICVF_ALIGNED_ADDR(ADDR, ALIGN_BYTES) ALIGN(ADDR, ALIGN_BYTES) +#define NICVF_ADDR_ALIGN_LEN(ADDR, BYTES)\ + (NICVF_ALIGNED_ADDR(ADDR, BYTES) - BYTES) +#define NICVF_RCV_BUF_ALIGN_LEN(X)\ + (NICVF_ALIGNED_ADDR(X, NICVF_RCV_BUF_ALIGN_BYTES) - X) + +/* Queue enable/disable */ +#define NICVF_SQ_EN BIT_ULL(19) + +/* Queue reset */ +#define NICVF_CQ_RESET BIT_ULL(41) +#define NICVF_SQ_RESET BIT_ULL(17) +#define NICVF_RBDR_RESET BIT_ULL(43) + +enum CQ_RX_ERRLVL_E { + CQ_ERRLVL_MAC, + CQ_ERRLVL_L2, + CQ_ERRLVL_L3, + CQ_ERRLVL_L4, +}; + +enum CQ_RX_ERROP_E { + CQ_RX_ERROP_RE_NONE = 0x0, + CQ_RX_ERROP_RE_PARTIAL = 0x1, + CQ_RX_ERROP_RE_JABBER = 0x2, + CQ_RX_ERROP_RE_FCS = 0x7, + CQ_RX_ERROP_RE_TERMINATE = 0x9, + CQ_RX_ERROP_RE_RX_CTL = 0xb, + CQ_RX_ERROP_PREL2_ERR = 0x1f, + CQ_RX_ERROP_L2_FRAGMENT = 0x20, + CQ_RX_ERROP_L2_OVERRUN = 0x21, + CQ_RX_ERROP_L2_PFCS = 0x22, + CQ_RX_ERROP_L2_PUNY = 0x23, + CQ_RX_ERROP_L2_MAL = 0x24, + CQ_RX_ERROP_L2_OVERSIZE = 0x25, + CQ_RX_ERROP_L2_UNDERSIZE = 0x26, + CQ_RX_ERROP_L2_LENMISM = 0x27, + CQ_RX_ERROP_L2_PCLP = 0x28, + CQ_RX_ERROP_IP_NOT = 0x41, + CQ_RX_ERROP_IP_CSUM_ERR = 0x42, + CQ_RX_ERROP_IP_MAL = 0x43, + CQ_RX_ERROP_IP_MALD = 0x44, + CQ_RX_ERROP_IP_HOP = 0x45, + CQ_RX_ERROP_L3_ICRC = 0x46, + CQ_RX_ERROP_L3_PCLP = 0x47, + CQ_RX_ERROP_L4_MAL = 0x61, + CQ_RX_ERROP_L4_CHK = 0x62, + CQ_RX_ERROP_UDP_LEN = 0x63, + CQ_RX_ERROP_L4_PORT = 0x64, + CQ_RX_ERROP_TCP_FLAG = 0x65, + CQ_RX_ERROP_TCP_OFFSET = 0x66, + CQ_RX_ERROP_L4_PCLP = 0x67, + CQ_RX_ERROP_RBDR_TRUNC = 0x70, +}; + +enum CQ_TX_ERROP_E { + CQ_TX_ERROP_GOOD = 0x0, + CQ_TX_ERROP_DESC_FAULT = 0x10, + CQ_TX_ERROP_HDR_CONS_ERR = 0x11, + CQ_TX_ERROP_SUBDC_ERR = 0x12, + CQ_TX_ERROP_IMM_SIZE_OFLOW = 0x80, + CQ_TX_ERROP_DATA_SEQUENCE_ERR = 0x81, + CQ_TX_ERROP_MEM_SEQUENCE_ERR = 0x82, + CQ_TX_ERROP_LOCK_VIOL = 0x83, + CQ_TX_ERROP_DATA_FAULT = 0x84, + CQ_TX_ERROP_TSTMP_CONFLICT = 0x85, + CQ_TX_ERROP_TSTMP_TIMEOUT = 0x86, + CQ_TX_ERROP_MEM_FAULT = 0x87, + CQ_TX_ERROP_CK_OVERLAP = 0x88, + CQ_TX_ERROP_CK_OFLOW = 0x89, + CQ_TX_ERROP_ENUM_LAST = 0x8a, +}; + +struct cmp_queue_stats { + struct rx_stats { + struct { + u64 mac_errs; + u64 l2_errs; + u64 l3_errs; + u64 l4_errs; + } errlvl; + struct { + u64 good; + u64 partial_pkts; + u64 jabber_errs; + u64 fcs_errs; + u64 terminate_errs; + u64 bgx_rx_errs; + u64 prel2_errs; + u64 l2_frags; + u64 l2_overruns; + u64 l2_pfcs; + u64 l2_puny; + u64 l2_hdr_malformed; + u64 l2_oversize; + u64 l2_undersize; + u64 l2_len_mismatch; + u64 l2_pclp; + u64 non_ip; + u64 ip_csum_err; + u64 ip_hdr_malformed; + u64 ip_payload_malformed; + u64 ip_hop_errs; + u64 l3_icrc_errs; + u64 l3_pclp; + u64 l4_malformed; + u64 l4_csum_errs; + u64 udp_len_err; + u64 bad_l4_port; + u64 bad_tcp_flag; + u64 tcp_offset_errs; + u64 l4_pclp; + u64 pkt_truncated; + } errop; + } rx; + struct tx_stats { + u64 good; + u64 desc_fault; + u64 hdr_cons_err; + u64 subdesc_err; + u64 imm_size_oflow; + u64 data_seq_err; + u64 mem_seq_err; + u64 lock_viol; + u64 data_fault; + u64 tstmp_conflict; + u64 tstmp_timeout; + u64 mem_fault; + u64 csum_overlap; + u64 csum_overflow; + } tx; +} ____cacheline_aligned_in_smp; + +enum RQ_SQ_STATS { + RQ_SQ_STATS_OCTS, + RQ_SQ_STATS_PKTS, +}; + +struct rx_tx_queue_stats { + u64 bytes; + u64 pkts; +} ____cacheline_aligned_in_smp; + +struct q_desc_mem { + dma_addr_t dma; + u64 size; + u16 q_len; + dma_addr_t phys_base; + void *base; + void *unalign_base; +}; + +struct rbdr { + bool enable; + u32 dma_size; + u32 frag_len; + u32 thresh; /* Threshold level for interrupt */ + void *desc; + u32 head; + u32 tail; + struct q_desc_mem dmem; +} ____cacheline_aligned_in_smp; + +struct rcv_queue { + bool enable; + struct rbdr *rbdr_start; + struct rbdr *rbdr_cont; + bool en_tcp_reassembly; + u8 cq_qs; /* CQ's QS to which this RQ is assigned */ + u8 cq_idx; /* CQ index (0 to 7) in the QS */ + u8 cont_rbdr_qs; /* Continue buffer ptrs - QS num */ + u8 cont_qs_rbdr_idx; /* RBDR idx in the cont QS */ + u8 start_rbdr_qs; /* First buffer ptrs - QS num */ + u8 start_qs_rbdr_idx; /* RBDR idx in the above QS */ + u8 caching; + struct rx_tx_queue_stats stats; +} ____cacheline_aligned_in_smp; + +struct cmp_queue { + bool enable; + u16 thresh; + spinlock_t lock; /* lock to serialize processing CQEs */ + void *desc; + struct q_desc_mem dmem; + struct cmp_queue_stats stats; +} ____cacheline_aligned_in_smp; + +struct snd_queue { + bool enable; + u8 cq_qs; /* CQ's QS to which this SQ is pointing */ + u8 cq_idx; /* CQ index (0 to 7) in the above QS */ + u16 thresh; + atomic_t free_cnt; + u32 head; + u32 tail; + u64 *skbuff; + void *desc; + +#define TSO_HEADER_SIZE 128 + /* For TSO segment's header */ + char *tso_hdrs; + dma_addr_t tso_hdrs_phys; + + cpumask_t affinity_mask; + struct q_desc_mem dmem; + struct rx_tx_queue_stats stats; +} ____cacheline_aligned_in_smp; + +struct queue_set { + bool enable; + bool be_en; + u8 vnic_id; + u8 rq_cnt; + u8 cq_cnt; + u64 cq_len; + u8 sq_cnt; + u64 sq_len; + u8 rbdr_cnt; + u64 rbdr_len; + struct rcv_queue rq[MAX_RCV_QUEUES_PER_QS]; + struct cmp_queue cq[MAX_CMP_QUEUES_PER_QS]; + struct snd_queue sq[MAX_SND_QUEUES_PER_QS]; + struct rbdr rbdr[MAX_RCV_BUF_DESC_RINGS_PER_QS]; +} ____cacheline_aligned_in_smp; + +#define GET_RBDR_DESC(RING, idx)\ + (&(((struct rbdr_entry_t *)((RING)->desc))[idx])) +#define GET_SQ_DESC(RING, idx)\ + (&(((struct sq_hdr_subdesc *)((RING)->desc))[idx])) +#define GET_CQ_DESC(RING, idx)\ + (&(((union cq_desc_t *)((RING)->desc))[idx])) + +/* CQ status bits */ +#define CQ_WR_FULL BIT(26) +#define CQ_WR_DISABLE BIT(25) +#define CQ_WR_FAULT BIT(24) +#define CQ_CQE_COUNT (0xFFFF << 0) + +#define CQ_ERR_MASK (CQ_WR_FULL | CQ_WR_DISABLE | CQ_WR_FAULT) + +int nicvf_set_qset_resources(struct nicvf *nic); +int nicvf_config_data_transfer(struct nicvf *nic, bool enable); +void nicvf_qset_config(struct nicvf *nic, bool enable); +void nicvf_cmp_queue_config(struct nicvf *nic, struct queue_set *qs, + int qidx, bool enable); + +void nicvf_sq_enable(struct nicvf *nic, struct snd_queue *sq, int qidx); +void nicvf_sq_disable(struct nicvf *nic, int qidx); +void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt); +void nicvf_sq_free_used_descs(struct net_device *netdev, + struct snd_queue *sq, int qidx); +int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb); + +struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx); +void nicvf_rbdr_task(unsigned long data); +void nicvf_rbdr_work(struct work_struct *work); + +void nicvf_enable_intr(struct nicvf *nic, int int_type, int q_idx); +void nicvf_disable_intr(struct nicvf *nic, int int_type, int q_idx); +void nicvf_clear_intr(struct nicvf *nic, int int_type, int q_idx); +int nicvf_is_intr_enabled(struct nicvf *nic, int int_type, int q_idx); + +/* Register access APIs */ +void nicvf_reg_write(struct nicvf *nic, u64 offset, u64 val); +u64 nicvf_reg_read(struct nicvf *nic, u64 offset); +void nicvf_qset_reg_write(struct nicvf *nic, u64 offset, u64 val); +u64 nicvf_qset_reg_read(struct nicvf *nic, u64 offset); +void nicvf_queue_reg_write(struct nicvf *nic, u64 offset, + u64 qidx, u64 val); +u64 nicvf_queue_reg_read(struct nicvf *nic, + u64 offset, u64 qidx); + +/* Stats */ +void nicvf_update_rq_stats(struct nicvf *nic, int rq_idx); +void nicvf_update_sq_stats(struct nicvf *nic, int sq_idx); +int nicvf_check_cqe_rx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_rx_t *cqe_rx); +int nicvf_check_cqe_tx_errs(struct nicvf *nic, + struct cmp_queue *cq, struct cqe_send_t *cqe_tx); +#endif /* NICVF_QUEUES_H */ diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h new file mode 100644 index 000000000000..3c1de97b1add --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/q_struct.h @@ -0,0 +1,701 @@ +/* + * This file contains HW queue descriptor formats, config register + * structures etc + * + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef Q_STRUCT_H +#define Q_STRUCT_H + +/* Load transaction types for reading segment bytes specified by + * NIC_SEND_GATHER_S[LD_TYPE]. + */ +enum nic_send_ld_type_e { + NIC_SEND_LD_TYPE_E_LDD = 0x0, + NIC_SEND_LD_TYPE_E_LDT = 0x1, + NIC_SEND_LD_TYPE_E_LDWB = 0x2, + NIC_SEND_LD_TYPE_E_ENUM_LAST = 0x3, +}; + +enum ether_type_algorithm { + ETYPE_ALG_NONE = 0x0, + ETYPE_ALG_SKIP = 0x1, + ETYPE_ALG_ENDPARSE = 0x2, + ETYPE_ALG_VLAN = 0x3, + ETYPE_ALG_VLAN_STRIP = 0x4, +}; + +enum layer3_type { + L3TYPE_NONE = 0x00, + L3TYPE_GRH = 0x01, + L3TYPE_IPV4 = 0x04, + L3TYPE_IPV4_OPTIONS = 0x05, + L3TYPE_IPV6 = 0x06, + L3TYPE_IPV6_OPTIONS = 0x07, + L3TYPE_ET_STOP = 0x0D, + L3TYPE_OTHER = 0x0E, +}; + +enum layer4_type { + L4TYPE_NONE = 0x00, + L4TYPE_IPSEC_ESP = 0x01, + L4TYPE_IPFRAG = 0x02, + L4TYPE_IPCOMP = 0x03, + L4TYPE_TCP = 0x04, + L4TYPE_UDP = 0x05, + L4TYPE_SCTP = 0x06, + L4TYPE_GRE = 0x07, + L4TYPE_ROCE_BTH = 0x08, + L4TYPE_OTHER = 0x0E, +}; + +/* CPI and RSSI configuration */ +enum cpi_algorithm_type { + CPI_ALG_NONE = 0x0, + CPI_ALG_VLAN = 0x1, + CPI_ALG_VLAN16 = 0x2, + CPI_ALG_DIFF = 0x3, +}; + +enum rss_algorithm_type { + RSS_ALG_NONE = 0x00, + RSS_ALG_PORT = 0x01, + RSS_ALG_IP = 0x02, + RSS_ALG_TCP_IP = 0x03, + RSS_ALG_UDP_IP = 0x04, + RSS_ALG_SCTP_IP = 0x05, + RSS_ALG_GRE_IP = 0x06, + RSS_ALG_ROCE = 0x07, +}; + +enum rss_hash_cfg { + RSS_HASH_L2ETC = 0x00, + RSS_HASH_IP = 0x01, + RSS_HASH_TCP = 0x02, + RSS_HASH_TCP_SYN_DIS = 0x03, + RSS_HASH_UDP = 0x04, + RSS_HASH_L4ETC = 0x05, + RSS_HASH_ROCE = 0x06, + RSS_L3_BIDI = 0x07, + RSS_L4_BIDI = 0x08, +}; + +/* Completion queue entry types */ +enum cqe_type { + CQE_TYPE_INVALID = 0x0, + CQE_TYPE_RX = 0x2, + CQE_TYPE_RX_SPLIT = 0x3, + CQE_TYPE_RX_TCP = 0x4, + CQE_TYPE_SEND = 0x8, + CQE_TYPE_SEND_PTP = 0x9, +}; + +enum cqe_rx_tcp_status { + CQE_RX_STATUS_VALID_TCP_CNXT = 0x00, + CQE_RX_STATUS_INVALID_TCP_CNXT = 0x0F, +}; + +enum cqe_send_status { + CQE_SEND_STATUS_GOOD = 0x00, + CQE_SEND_STATUS_DESC_FAULT = 0x01, + CQE_SEND_STATUS_HDR_CONS_ERR = 0x11, + CQE_SEND_STATUS_SUBDESC_ERR = 0x12, + CQE_SEND_STATUS_IMM_SIZE_OFLOW = 0x80, + CQE_SEND_STATUS_CRC_SEQ_ERR = 0x81, + CQE_SEND_STATUS_DATA_SEQ_ERR = 0x82, + CQE_SEND_STATUS_MEM_SEQ_ERR = 0x83, + CQE_SEND_STATUS_LOCK_VIOL = 0x84, + CQE_SEND_STATUS_LOCK_UFLOW = 0x85, + CQE_SEND_STATUS_DATA_FAULT = 0x86, + CQE_SEND_STATUS_TSTMP_CONFLICT = 0x87, + CQE_SEND_STATUS_TSTMP_TIMEOUT = 0x88, + CQE_SEND_STATUS_MEM_FAULT = 0x89, + CQE_SEND_STATUS_CSUM_OVERLAP = 0x8A, + CQE_SEND_STATUS_CSUM_OVERFLOW = 0x8B, +}; + +enum cqe_rx_tcp_end_reason { + CQE_RX_TCP_END_FIN_FLAG_DET = 0, + CQE_RX_TCP_END_INVALID_FLAG = 1, + CQE_RX_TCP_END_TIMEOUT = 2, + CQE_RX_TCP_END_OUT_OF_SEQ = 3, + CQE_RX_TCP_END_PKT_ERR = 4, + CQE_RX_TCP_END_QS_DISABLED = 0x0F, +}; + +/* Packet protocol level error enumeration */ +enum cqe_rx_err_level { + CQE_RX_ERRLVL_RE = 0x0, + CQE_RX_ERRLVL_L2 = 0x1, + CQE_RX_ERRLVL_L3 = 0x2, + CQE_RX_ERRLVL_L4 = 0x3, +}; + +/* Packet protocol level error type enumeration */ +enum cqe_rx_err_opcode { + CQE_RX_ERR_RE_NONE = 0x0, + CQE_RX_ERR_RE_PARTIAL = 0x1, + CQE_RX_ERR_RE_JABBER = 0x2, + CQE_RX_ERR_RE_FCS = 0x7, + CQE_RX_ERR_RE_TERMINATE = 0x9, + CQE_RX_ERR_RE_RX_CTL = 0xb, + CQE_RX_ERR_PREL2_ERR = 0x1f, + CQE_RX_ERR_L2_FRAGMENT = 0x20, + CQE_RX_ERR_L2_OVERRUN = 0x21, + CQE_RX_ERR_L2_PFCS = 0x22, + CQE_RX_ERR_L2_PUNY = 0x23, + CQE_RX_ERR_L2_MAL = 0x24, + CQE_RX_ERR_L2_OVERSIZE = 0x25, + CQE_RX_ERR_L2_UNDERSIZE = 0x26, + CQE_RX_ERR_L2_LENMISM = 0x27, + CQE_RX_ERR_L2_PCLP = 0x28, + CQE_RX_ERR_IP_NOT = 0x41, + CQE_RX_ERR_IP_CHK = 0x42, + CQE_RX_ERR_IP_MAL = 0x43, + CQE_RX_ERR_IP_MALD = 0x44, + CQE_RX_ERR_IP_HOP = 0x45, + CQE_RX_ERR_L3_ICRC = 0x46, + CQE_RX_ERR_L3_PCLP = 0x47, + CQE_RX_ERR_L4_MAL = 0x61, + CQE_RX_ERR_L4_CHK = 0x62, + CQE_RX_ERR_UDP_LEN = 0x63, + CQE_RX_ERR_L4_PORT = 0x64, + CQE_RX_ERR_TCP_FLAG = 0x65, + CQE_RX_ERR_TCP_OFFSET = 0x66, + CQE_RX_ERR_L4_PCLP = 0x67, + CQE_RX_ERR_RBDR_TRUNC = 0x70, +}; + +struct cqe_rx_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 stdn_fault:1; + u64 rsvd0:1; + u64 rq_qs:7; + u64 rq_idx:3; + u64 rsvd1:12; + u64 rss_alg:4; + u64 rsvd2:4; + u64 rb_cnt:4; + u64 vlan_found:1; + u64 vlan_stripped:1; + u64 vlan2_found:1; + u64 vlan2_stripped:1; + u64 l4_type:4; + u64 l3_type:4; + u64 l2_present:1; + u64 err_level:3; + u64 err_opcode:8; + + u64 pkt_len:16; /* W1 */ + u64 l2_ptr:8; + u64 l3_ptr:8; + u64 l4_ptr:8; + u64 cq_pkt_len:8; + u64 align_pad:3; + u64 rsvd3:1; + u64 chan:12; + + u64 rss_tag:32; /* W2 */ + u64 vlan_tci:16; + u64 vlan_ptr:8; + u64 vlan2_ptr:8; + + u64 rb3_sz:16; /* W3 */ + u64 rb2_sz:16; + u64 rb1_sz:16; + u64 rb0_sz:16; + + u64 rb7_sz:16; /* W4 */ + u64 rb6_sz:16; + u64 rb5_sz:16; + u64 rb4_sz:16; + + u64 rb11_sz:16; /* W5 */ + u64 rb10_sz:16; + u64 rb9_sz:16; + u64 rb8_sz:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 err_opcode:8; + u64 err_level:3; + u64 l2_present:1; + u64 l3_type:4; + u64 l4_type:4; + u64 vlan2_stripped:1; + u64 vlan2_found:1; + u64 vlan_stripped:1; + u64 vlan_found:1; + u64 rb_cnt:4; + u64 rsvd2:4; + u64 rss_alg:4; + u64 rsvd1:12; + u64 rq_idx:3; + u64 rq_qs:7; + u64 rsvd0:1; + u64 stdn_fault:1; + u64 cqe_type:4; /* W0 */ + u64 chan:12; + u64 rsvd3:1; + u64 align_pad:3; + u64 cq_pkt_len:8; + u64 l4_ptr:8; + u64 l3_ptr:8; + u64 l2_ptr:8; + u64 pkt_len:16; /* W1 */ + u64 vlan2_ptr:8; + u64 vlan_ptr:8; + u64 vlan_tci:16; + u64 rss_tag:32; /* W2 */ + u64 rb0_sz:16; + u64 rb1_sz:16; + u64 rb2_sz:16; + u64 rb3_sz:16; /* W3 */ + u64 rb4_sz:16; + u64 rb5_sz:16; + u64 rb6_sz:16; + u64 rb7_sz:16; /* W4 */ + u64 rb8_sz:16; + u64 rb9_sz:16; + u64 rb10_sz:16; + u64 rb11_sz:16; /* W5 */ +#endif + u64 rb0_ptr:64; + u64 rb1_ptr:64; + u64 rb2_ptr:64; + u64 rb3_ptr:64; + u64 rb4_ptr:64; + u64 rb5_ptr:64; + u64 rb6_ptr:64; + u64 rb7_ptr:64; + u64 rb8_ptr:64; + u64 rb9_ptr:64; + u64 rb10_ptr:64; + u64 rb11_ptr:64; +}; + +struct cqe_rx_tcp_err_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 rsvd0:60; + + u64 rsvd1:4; /* W1 */ + u64 partial_first:1; + u64 rsvd2:27; + u64 rbdr_bytes:8; + u64 rsvd3:24; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 rsvd0:60; + u64 cqe_type:4; + + u64 rsvd3:24; + u64 rbdr_bytes:8; + u64 rsvd2:27; + u64 partial_first:1; + u64 rsvd1:4; +#endif +}; + +struct cqe_rx_tcp_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 rsvd0:52; + u64 cq_tcp_status:8; + + u64 rsvd1:32; /* W1 */ + u64 tcp_cntx_bytes:8; + u64 rsvd2:8; + u64 tcp_err_bytes:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 cq_tcp_status:8; + u64 rsvd0:52; + u64 cqe_type:4; /* W0 */ + + u64 tcp_err_bytes:16; + u64 rsvd2:8; + u64 tcp_cntx_bytes:8; + u64 rsvd1:32; /* W1 */ +#endif +}; + +struct cqe_send_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 cqe_type:4; /* W0 */ + u64 rsvd0:4; + u64 sqe_ptr:16; + u64 rsvd1:4; + u64 rsvd2:10; + u64 sq_qs:7; + u64 sq_idx:3; + u64 rsvd3:8; + u64 send_status:8; + + u64 ptp_timestamp:64; /* W1 */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 send_status:8; + u64 rsvd3:8; + u64 sq_idx:3; + u64 sq_qs:7; + u64 rsvd2:10; + u64 rsvd1:4; + u64 sqe_ptr:16; + u64 rsvd0:4; + u64 cqe_type:4; /* W0 */ + + u64 ptp_timestamp:64; /* W1 */ +#endif +}; + +union cq_desc_t { + u64 u[64]; + struct cqe_send_t snd_hdr; + struct cqe_rx_t rx_hdr; + struct cqe_rx_tcp_t rx_tcp_hdr; + struct cqe_rx_tcp_err_t rx_tcp_err_hdr; +}; + +struct rbdr_entry_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd0:15; + u64 buf_addr:42; + u64 cache_align:7; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 cache_align:7; + u64 buf_addr:42; + u64 rsvd0:15; +#endif +}; + +/* TCP reassembly context */ +struct rbe_tcp_cnxt_t { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 tcp_pkt_cnt:12; + u64 rsvd1:4; + u64 align_hdr_bytes:4; + u64 align_ptr_bytes:4; + u64 ptr_bytes:16; + u64 rsvd2:24; + u64 cqe_type:4; + u64 rsvd0:54; + u64 tcp_end_reason:2; + u64 tcp_status:4; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tcp_status:4; + u64 tcp_end_reason:2; + u64 rsvd0:54; + u64 cqe_type:4; + u64 rsvd2:24; + u64 ptr_bytes:16; + u64 align_ptr_bytes:4; + u64 align_hdr_bytes:4; + u64 rsvd1:4; + u64 tcp_pkt_cnt:12; +#endif +}; + +/* Always Big endian */ +struct rx_hdr_t { + u64 opaque:32; + u64 rss_flow:8; + u64 skip_length:6; + u64 disable_rss:1; + u64 disable_tcp_reassembly:1; + u64 nodrop:1; + u64 dest_alg:2; + u64 rsvd0:2; + u64 dest_rq:11; +}; + +enum send_l4_csum_type { + SEND_L4_CSUM_DISABLE = 0x00, + SEND_L4_CSUM_UDP = 0x01, + SEND_L4_CSUM_TCP = 0x02, + SEND_L4_CSUM_SCTP = 0x03, +}; + +enum send_crc_alg { + SEND_CRCALG_CRC32 = 0x00, + SEND_CRCALG_CRC32C = 0x01, + SEND_CRCALG_ICRC = 0x02, +}; + +enum send_load_type { + SEND_LD_TYPE_LDD = 0x00, + SEND_LD_TYPE_LDT = 0x01, + SEND_LD_TYPE_LDWB = 0x02, +}; + +enum send_mem_alg_type { + SEND_MEMALG_SET = 0x00, + SEND_MEMALG_ADD = 0x08, + SEND_MEMALG_SUB = 0x09, + SEND_MEMALG_ADDLEN = 0x0A, + SEND_MEMALG_SUBLEN = 0x0B, +}; + +enum send_mem_dsz_type { + SEND_MEMDSZ_B64 = 0x00, + SEND_MEMDSZ_B32 = 0x01, + SEND_MEMDSZ_B8 = 0x03, +}; + +enum sq_subdesc_type { + SQ_DESC_TYPE_INVALID = 0x00, + SQ_DESC_TYPE_HEADER = 0x01, + SQ_DESC_TYPE_CRC = 0x02, + SQ_DESC_TYPE_IMMEDIATE = 0x03, + SQ_DESC_TYPE_GATHER = 0x04, + SQ_DESC_TYPE_MEMORY = 0x05, +}; + +struct sq_crc_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 rsvd1:32; + u64 crc_ival:32; + u64 subdesc_type:4; + u64 crc_alg:2; + u64 rsvd0:10; + u64 crc_insert_pos:16; + u64 hdr_start:16; + u64 crc_len:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 crc_len:16; + u64 hdr_start:16; + u64 crc_insert_pos:16; + u64 rsvd0:10; + u64 crc_alg:2; + u64 subdesc_type:4; + u64 crc_ival:32; + u64 rsvd1:32; +#endif +}; + +struct sq_gather_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; /* W0 */ + u64 ld_type:2; + u64 rsvd0:42; + u64 size:16; + + u64 rsvd1:15; /* W1 */ + u64 addr:49; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 size:16; + u64 rsvd0:42; + u64 ld_type:2; + u64 subdesc_type:4; /* W0 */ + + u64 addr:49; + u64 rsvd1:15; /* W1 */ +#endif +}; + +/* SQ immediate subdescriptor */ +struct sq_imm_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; /* W0 */ + u64 rsvd0:46; + u64 len:14; + + u64 data:64; /* W1 */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 len:14; + u64 rsvd0:46; + u64 subdesc_type:4; /* W0 */ + + u64 data:64; /* W1 */ +#endif +}; + +struct sq_mem_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; /* W0 */ + u64 mem_alg:4; + u64 mem_dsz:2; + u64 wmem:1; + u64 rsvd0:21; + u64 offset:32; + + u64 rsvd1:15; /* W1 */ + u64 addr:49; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 offset:32; + u64 rsvd0:21; + u64 wmem:1; + u64 mem_dsz:2; + u64 mem_alg:4; + u64 subdesc_type:4; /* W0 */ + + u64 addr:49; + u64 rsvd1:15; /* W1 */ +#endif +}; + +struct sq_hdr_subdesc { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 subdesc_type:4; + u64 tso:1; + u64 post_cqe:1; /* Post CQE on no error also */ + u64 dont_send:1; + u64 tstmp:1; + u64 subdesc_cnt:8; + u64 csum_l4:2; + u64 csum_l3:1; + u64 rsvd0:5; + u64 l4_offset:8; + u64 l3_offset:8; + u64 rsvd1:4; + u64 tot_len:20; /* W0 */ + + u64 tso_sdc_cont:8; + u64 tso_sdc_first:8; + u64 tso_l4_offset:8; + u64 tso_flags_last:12; + u64 tso_flags_first:12; + u64 rsvd2:2; + u64 tso_max_paysize:14; /* W1 */ +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tot_len:20; + u64 rsvd1:4; + u64 l3_offset:8; + u64 l4_offset:8; + u64 rsvd0:5; + u64 csum_l3:1; + u64 csum_l4:2; + u64 subdesc_cnt:8; + u64 tstmp:1; + u64 dont_send:1; + u64 post_cqe:1; /* Post CQE on no error also */ + u64 tso:1; + u64 subdesc_type:4; /* W0 */ + + u64 tso_max_paysize:14; + u64 rsvd2:2; + u64 tso_flags_first:12; + u64 tso_flags_last:12; + u64 tso_l4_offset:8; + u64 tso_sdc_first:8; + u64 tso_sdc_cont:8; /* W1 */ +#endif +}; + +/* Queue config register formats */ +struct rq_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_2_63:62; + u64 ena:1; + u64 tcp_ena:1; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tcp_ena:1; + u64 ena:1; + u64 reserved_2_63:62; +#endif +}; + +struct cq_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_43_63:21; + u64 ena:1; + u64 reset:1; + u64 caching:1; + u64 reserved_35_39:5; + u64 qsize:3; + u64 reserved_25_31:7; + u64 avg_con:9; + u64 reserved_0_15:16; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 reserved_0_15:16; + u64 avg_con:9; + u64 reserved_25_31:7; + u64 qsize:3; + u64 reserved_35_39:5; + u64 caching:1; + u64 reset:1; + u64 ena:1; + u64 reserved_43_63:21; +#endif +}; + +struct sq_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_20_63:44; + u64 ena:1; + u64 reserved_18_18:1; + u64 reset:1; + u64 ldwb:1; + u64 reserved_11_15:5; + u64 qsize:3; + u64 reserved_3_7:5; + u64 tstmp_bgx_intf:3; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 tstmp_bgx_intf:3; + u64 reserved_3_7:5; + u64 qsize:3; + u64 reserved_11_15:5; + u64 ldwb:1; + u64 reset:1; + u64 reserved_18_18:1; + u64 ena:1; + u64 reserved_20_63:44; +#endif +}; + +struct rbdr_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_45_63:19; + u64 ena:1; + u64 reset:1; + u64 ldwb:1; + u64 reserved_36_41:6; + u64 qsize:4; + u64 reserved_25_31:7; + u64 avg_con:9; + u64 reserved_12_15:4; + u64 lines:12; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 lines:12; + u64 reserved_12_15:4; + u64 avg_con:9; + u64 reserved_25_31:7; + u64 qsize:4; + u64 reserved_36_41:6; + u64 ldwb:1; + u64 reset:1; + u64 ena: 1; + u64 reserved_45_63:19; +#endif +}; + +struct qs_cfg { +#if defined(__BIG_ENDIAN_BITFIELD) + u64 reserved_32_63:32; + u64 ena:1; + u64 reserved_27_30:4; + u64 sq_ins_ena:1; + u64 sq_ins_pos:6; + u64 lock_ena:1; + u64 lock_viol_cqe_ena:1; + u64 send_tstmp_ena:1; + u64 be:1; + u64 reserved_7_15:9; + u64 vnic:7; +#elif defined(__LITTLE_ENDIAN_BITFIELD) + u64 vnic:7; + u64 reserved_7_15:9; + u64 be:1; + u64 send_tstmp_ena:1; + u64 lock_viol_cqe_ena:1; + u64 lock_ena:1; + u64 sq_ins_pos:6; + u64 sq_ins_ena:1; + u64 reserved_27_30:4; + u64 ena:1; + u64 reserved_32_63:32; +#endif +}; + +#endif /* Q_STRUCT_H */ diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c new file mode 100644 index 000000000000..020e11cf3fdd --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -0,0 +1,966 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/phy.h> +#include <linux/of.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> + +#include "nic_reg.h" +#include "nic.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-BGX" +#define DRV_VERSION "1.0" + +struct lmac { + struct bgx *bgx; + int dmac; + unsigned char mac[ETH_ALEN]; + bool link_up; + int lmacid; /* ID within BGX */ + int lmacid_bd; /* ID on board */ + struct net_device netdev; + struct phy_device *phydev; + unsigned int last_duplex; + unsigned int last_link; + unsigned int last_speed; + bool is_sgmii; + struct delayed_work dwork; + struct workqueue_struct *check_link; +} lmac; + +struct bgx { + u8 bgx_id; + u8 qlm_mode; + struct lmac lmac[MAX_LMAC_PER_BGX]; + int lmac_count; + int lmac_type; + int lane_to_sds; + int use_training; + void __iomem *reg_base; + struct pci_dev *pdev; +} bgx; + +struct bgx *bgx_vnic[MAX_BGX_THUNDER]; +static int lmac_count; /* Total no of LMACs in system */ + +static int bgx_xaui_check_link(struct lmac *lmac); + +/* Supported devices */ +static const struct pci_device_id bgx_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Cavium Inc"); +MODULE_DESCRIPTION("Cavium Thunder BGX/MAC Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, bgx_id_table); + +/* The Cavium ThunderX network controller can *only* be found in SoCs + * containing the ThunderX ARM64 CPU implementation. All accesses to the device + * registers on this platform are implicitly strongly ordered with respect + * to memory accesses. So writeq_relaxed() and readq_relaxed() are safe to use + * with no memory barriers in this driver. The readq()/writeq() functions add + * explicit ordering operation which in this case are redundant, and only + * add overhead. + */ + +/* Register read/write APIs */ +static u64 bgx_reg_read(struct bgx *bgx, u8 lmac, u64 offset) +{ + void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset; + + return readq_relaxed(addr); +} + +static void bgx_reg_write(struct bgx *bgx, u8 lmac, u64 offset, u64 val) +{ + void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset; + + writeq_relaxed(val, addr); +} + +static void bgx_reg_modify(struct bgx *bgx, u8 lmac, u64 offset, u64 val) +{ + void __iomem *addr = bgx->reg_base + ((u32)lmac << 20) + offset; + + writeq_relaxed(val | readq_relaxed(addr), addr); +} + +static int bgx_poll_reg(struct bgx *bgx, u8 lmac, u64 reg, u64 mask, bool zero) +{ + int timeout = 100; + u64 reg_val; + + while (timeout) { + reg_val = bgx_reg_read(bgx, lmac, reg); + if (zero && !(reg_val & mask)) + return 0; + if (!zero && (reg_val & mask)) + return 0; + usleep_range(1000, 2000); + timeout--; + } + return 1; +} + +/* Return number of BGX present in HW */ +unsigned bgx_get_map(int node) +{ + int i; + unsigned map = 0; + + for (i = 0; i < MAX_BGX_PER_CN88XX; i++) { + if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i]) + map |= (1 << i); + } + + return map; +} +EXPORT_SYMBOL(bgx_get_map); + +/* Return number of LMAC configured for this BGX */ +int bgx_get_lmac_count(int node, int bgx_idx) +{ + struct bgx *bgx; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (bgx) + return bgx->lmac_count; + + return 0; +} +EXPORT_SYMBOL(bgx_get_lmac_count); + +/* Returns the current link status of LMAC */ +void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status) +{ + struct bgx_link_status *link = (struct bgx_link_status *)status; + struct bgx *bgx; + struct lmac *lmac; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (!bgx) + return; + + lmac = &bgx->lmac[lmacid]; + link->link_up = lmac->link_up; + link->duplex = lmac->last_duplex; + link->speed = lmac->last_speed; +} +EXPORT_SYMBOL(bgx_get_lmac_link_state); + +const char *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid) +{ + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + + if (bgx) + return bgx->lmac[lmacid].mac; + + return NULL; +} +EXPORT_SYMBOL(bgx_get_lmac_mac); + +void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const char *mac) +{ + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + + if (!bgx) + return; + + ether_addr_copy(bgx->lmac[lmacid].mac, mac); +} +EXPORT_SYMBOL(bgx_set_lmac_mac); + +static void bgx_sgmii_change_link_state(struct lmac *lmac) +{ + struct bgx *bgx = lmac->bgx; + u64 cmr_cfg; + u64 port_cfg = 0; + u64 misc_ctl = 0; + + cmr_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG); + cmr_cfg &= ~CMR_EN; + bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg); + + port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG); + misc_ctl = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL); + + if (lmac->link_up) { + misc_ctl &= ~PCS_MISC_CTL_GMX_ENO; + port_cfg &= ~GMI_PORT_CFG_DUPLEX; + port_cfg |= (lmac->last_duplex << 2); + } else { + misc_ctl |= PCS_MISC_CTL_GMX_ENO; + } + + switch (lmac->last_speed) { + case 10: + port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */ + port_cfg |= GMI_PORT_CFG_SPEED_MSB; /* speed_msb 1 */ + port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */ + misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK; + misc_ctl |= 50; /* samp_pt */ + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64); + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0); + break; + case 100: + port_cfg &= ~GMI_PORT_CFG_SPEED; /* speed 0 */ + port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */ + port_cfg &= ~GMI_PORT_CFG_SLOT_TIME; /* slottime 0 */ + misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK; + misc_ctl |= 5; /* samp_pt */ + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 64); + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_BURST, 0); + break; + case 1000: + port_cfg |= GMI_PORT_CFG_SPEED; /* speed 1 */ + port_cfg &= ~GMI_PORT_CFG_SPEED_MSB; /* speed_msb 0 */ + port_cfg |= GMI_PORT_CFG_SLOT_TIME; /* slottime 1 */ + misc_ctl &= ~PCS_MISC_CTL_SAMP_PT_MASK; + misc_ctl |= 1; /* samp_pt */ + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_TXX_SLOT, 512); + if (lmac->last_duplex) + bgx_reg_write(bgx, lmac->lmacid, + BGX_GMP_GMI_TXX_BURST, 0); + else + bgx_reg_write(bgx, lmac->lmacid, + BGX_GMP_GMI_TXX_BURST, 8192); + break; + default: + break; + } + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_PCS_MISCX_CTL, misc_ctl); + bgx_reg_write(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG, port_cfg); + + port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG); + + /* renable lmac */ + cmr_cfg |= CMR_EN; + bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg); +} + +void bgx_lmac_handler(struct net_device *netdev) +{ + struct lmac *lmac = container_of(netdev, struct lmac, netdev); + struct phy_device *phydev = lmac->phydev; + int link_changed = 0; + + if (!lmac) + return; + + if (!phydev->link && lmac->last_link) + link_changed = -1; + + if (phydev->link && + (lmac->last_duplex != phydev->duplex || + lmac->last_link != phydev->link || + lmac->last_speed != phydev->speed)) { + link_changed = 1; + } + + lmac->last_link = phydev->link; + lmac->last_speed = phydev->speed; + lmac->last_duplex = phydev->duplex; + + if (!link_changed) + return; + + if (link_changed > 0) + lmac->link_up = true; + else + lmac->link_up = false; + + if (lmac->is_sgmii) + bgx_sgmii_change_link_state(lmac); + else + bgx_xaui_check_link(lmac); +} + +u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx) +{ + struct bgx *bgx; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (!bgx) + return 0; + + if (idx > 8) + lmac = 0; + return bgx_reg_read(bgx, lmac, BGX_CMRX_RX_STAT0 + (idx * 8)); +} +EXPORT_SYMBOL(bgx_get_rx_stats); + +u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx) +{ + struct bgx *bgx; + + bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + if (!bgx) + return 0; + + return bgx_reg_read(bgx, lmac, BGX_CMRX_TX_STAT0 + (idx * 8)); +} +EXPORT_SYMBOL(bgx_get_tx_stats); + +static void bgx_flush_dmac_addrs(struct bgx *bgx, int lmac) +{ + u64 offset; + + while (bgx->lmac[lmac].dmac > 0) { + offset = ((bgx->lmac[lmac].dmac - 1) * sizeof(u64)) + + (lmac * MAX_DMAC_PER_LMAC * sizeof(u64)); + bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + offset, 0); + bgx->lmac[lmac].dmac--; + } +} + +static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid) +{ + u64 cfg; + + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30); + /* max packet size */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_RXX_JABBER, MAX_FRAME_SIZE); + + /* Disable frame alignment if using preamble */ + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND); + if (cfg & 1) + bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_SGMII_CTL, 0); + + /* Enable lmac */ + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN); + + /* PCS reset */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_RESET); + if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, + PCS_MRX_CTL_RESET, true)) { + dev_err(&bgx->pdev->dev, "BGX PCS reset not completed\n"); + return -1; + } + + /* power down, reset autoneg, autoneg enable */ + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MRX_CTL); + cfg &= ~PCS_MRX_CTL_PWR_DN; + cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN); + bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg); + + if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, + PCS_MRX_STATUS_AN_CPT, false)) { + dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); + return -1; + } + + return 0; +} + +static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) +{ + u64 cfg; + + /* Reset SPU */ + bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET); + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) { + dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n"); + return -1; + } + + /* Disable LMAC */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER); + /* Set interleaved running disparity for RXAUI */ + if (bgx->lmac_type != BGX_MODE_RXAUI) + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); + else + bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, + SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP); + + /* clear all interrupts */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT); + bgx_reg_write(bgx, lmacid, BGX_SMUX_RX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_INT); + bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); + bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); + + if (bgx->use_training) { + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00); + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00); + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00); + /* training enable */ + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_BR_PMD_CRTL, SPU_PMD_CRTL_TRAIN_EN); + } + + /* Append FCS to each packet */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, SMU_TX_APPEND_FCS_D); + + /* Disable forward error correction */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_FEC_CONTROL); + cfg &= ~SPU_FEC_CTL_FEC_EN; + bgx_reg_write(bgx, lmacid, BGX_SPUX_FEC_CONTROL, cfg); + + /* Disable autoneg */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_CONTROL); + cfg = cfg & ~(SPU_AN_CTL_AN_EN | SPU_AN_CTL_XNP_EN); + bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg); + + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV); + if (bgx->lmac_type == BGX_MODE_10G_KR) + cfg |= (1 << 23); + else if (bgx->lmac_type == BGX_MODE_40G_KR) + cfg |= (1 << 24); + else + cfg &= ~((1 << 23) | (1 << 24)); + cfg = cfg & (~((1ULL << 25) | (1ULL << 22) | (1ULL << 12))); + bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_ADV, cfg); + + cfg = bgx_reg_read(bgx, 0, BGX_SPU_DBG_CONTROL); + cfg &= ~SPU_DBG_CTL_AN_ARB_LINK_CHK_EN; + bgx_reg_write(bgx, 0, BGX_SPU_DBG_CONTROL, cfg); + + /* Enable lmac */ + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, CMR_EN); + + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_CONTROL1); + cfg &= ~SPU_CTL_LOW_POWER; + bgx_reg_write(bgx, lmacid, BGX_SPUX_CONTROL1, cfg); + + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_CTL); + cfg &= ~SMU_TX_CTL_UNI_EN; + cfg |= SMU_TX_CTL_DIC_EN; + bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_CTL, cfg); + + /* take lmac_count into account */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_THRESH, (0x100 - 1)); + /* max packet size */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_RX_JABBER, MAX_FRAME_SIZE); + + return 0; +} + +static int bgx_xaui_check_link(struct lmac *lmac) +{ + struct bgx *bgx = lmac->bgx; + int lmacid = lmac->lmacid; + int lmac_type = bgx->lmac_type; + u64 cfg; + + bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); + if (bgx->use_training) { + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); + if (!(cfg & (1ull << 13))) { + cfg = (1ull << 13) | (1ull << 14); + bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL); + cfg |= (1ull << 0); + bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL, cfg); + return -1; + } + } + + /* wait for PCS to come out of reset */ + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET, true)) { + dev_err(&bgx->pdev->dev, "BGX SPU reset not completed\n"); + return -1; + } + + if ((lmac_type == BGX_MODE_10G_KR) || (lmac_type == BGX_MODE_XFI) || + (lmac_type == BGX_MODE_40G_KR) || (lmac_type == BGX_MODE_XLAUI)) { + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BR_STATUS1, + SPU_BR_STATUS_BLK_LOCK, false)) { + dev_err(&bgx->pdev->dev, + "SPU_BR_STATUS_BLK_LOCK not completed\n"); + return -1; + } + } else { + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_BX_STATUS, + SPU_BX_STATUS_RX_ALIGN, false)) { + dev_err(&bgx->pdev->dev, + "SPU_BX_STATUS_RX_ALIGN not completed\n"); + return -1; + } + } + + /* Clear rcvflt bit (latching high) and read it back */ + bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); + if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { + dev_err(&bgx->pdev->dev, "Receive fault, retry training\n"); + if (bgx->use_training) { + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); + if (!(cfg & (1ull << 13))) { + cfg = (1ull << 13) | (1ull << 14); + bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); + cfg = bgx_reg_read(bgx, lmacid, + BGX_SPUX_BR_PMD_CRTL); + cfg |= (1ull << 0); + bgx_reg_write(bgx, lmacid, + BGX_SPUX_BR_PMD_CRTL, cfg); + return -1; + } + } + return -1; + } + + /* Wait for MAC RX to be ready */ + if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL, + SMU_RX_CTL_STATUS, true)) { + dev_err(&bgx->pdev->dev, "SMU RX link not okay\n"); + return -1; + } + + /* Wait for BGX RX to be idle */ + if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) { + dev_err(&bgx->pdev->dev, "SMU RX not idle\n"); + return -1; + } + + /* Wait for BGX TX to be idle */ + if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_TX_IDLE, false)) { + dev_err(&bgx->pdev->dev, "SMU TX not idle\n"); + return -1; + } + + if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { + dev_err(&bgx->pdev->dev, "Receive fault\n"); + return -1; + } + + /* Receive link is latching low. Force it high and verify it */ + bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK); + if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1, + SPU_STATUS1_RCV_LNK, false)) { + dev_err(&bgx->pdev->dev, "SPU receive link down\n"); + return -1; + } + + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); + cfg &= ~SPU_MISC_CTL_RX_DIS; + bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); + return 0; +} + +static void bgx_poll_for_link(struct work_struct *work) +{ + struct lmac *lmac; + u64 link; + + lmac = container_of(work, struct lmac, dwork.work); + + /* Receive link is latching low. Force it high and verify it */ + bgx_reg_modify(lmac->bgx, lmac->lmacid, + BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK); + bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1, + SPU_STATUS1_RCV_LNK, false); + + link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1); + if (link & SPU_STATUS1_RCV_LNK) { + lmac->link_up = 1; + if (lmac->bgx->lmac_type == BGX_MODE_XLAUI) + lmac->last_speed = 40000; + else + lmac->last_speed = 10000; + lmac->last_duplex = 1; + } else { + lmac->link_up = 0; + } + + if (lmac->last_link != lmac->link_up) { + lmac->last_link = lmac->link_up; + if (lmac->link_up) + bgx_xaui_check_link(lmac); + } + + queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2); +} + +static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) +{ + struct lmac *lmac; + u64 cfg; + + lmac = &bgx->lmac[lmacid]; + lmac->bgx = bgx; + + if (bgx->lmac_type == BGX_MODE_SGMII) { + lmac->is_sgmii = 1; + if (bgx_lmac_sgmii_init(bgx, lmacid)) + return -1; + } else { + lmac->is_sgmii = 0; + if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type)) + return -1; + } + + if (lmac->is_sgmii) { + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND); + cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */ + bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_APPEND, cfg); + bgx_reg_write(bgx, lmacid, BGX_GMP_GMI_TXX_MIN_PKT, 60 - 1); + } else { + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_APPEND); + cfg |= ((1ull << 2) | (1ull << 1)); /* FCS and PAD */ + bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_APPEND, cfg); + bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_MIN_PKT, 60 + 4); + } + + /* Enable lmac */ + bgx_reg_modify(bgx, lmacid, BGX_CMRX_CFG, + CMR_EN | CMR_PKT_RX_EN | CMR_PKT_TX_EN); + + /* Restore default cfg, incase low level firmware changed it */ + bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03); + + if ((bgx->lmac_type != BGX_MODE_XFI) && + (bgx->lmac_type != BGX_MODE_XLAUI) && + (bgx->lmac_type != BGX_MODE_40G_KR) && + (bgx->lmac_type != BGX_MODE_10G_KR)) { + if (!lmac->phydev) + return -ENODEV; + + lmac->phydev->dev_flags = 0; + + if (phy_connect_direct(&lmac->netdev, lmac->phydev, + bgx_lmac_handler, + PHY_INTERFACE_MODE_SGMII)) + return -ENODEV; + + phy_start_aneg(lmac->phydev); + } else { + lmac->check_link = alloc_workqueue("check_link", WQ_UNBOUND | + WQ_MEM_RECLAIM, 1); + if (!lmac->check_link) + return -ENOMEM; + INIT_DELAYED_WORK(&lmac->dwork, bgx_poll_for_link); + queue_delayed_work(lmac->check_link, &lmac->dwork, 0); + } + + return 0; +} + +void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) +{ + struct lmac *lmac; + u64 cmrx_cfg; + + lmac = &bgx->lmac[lmacid]; + if (lmac->check_link) { + /* Destroy work queue */ + cancel_delayed_work(&lmac->dwork); + flush_workqueue(lmac->check_link); + destroy_workqueue(lmac->check_link); + } + + cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cmrx_cfg &= ~(1 << 15); + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg); + bgx_flush_dmac_addrs(bgx, lmacid); + + if (lmac->phydev) + phy_disconnect(lmac->phydev); + + lmac->phydev = NULL; +} + +static void bgx_set_num_ports(struct bgx *bgx) +{ + u64 lmac_count; + + switch (bgx->qlm_mode) { + case QLM_MODE_SGMII: + bgx->lmac_count = 4; + bgx->lmac_type = BGX_MODE_SGMII; + bgx->lane_to_sds = 0; + break; + case QLM_MODE_XAUI_1X4: + bgx->lmac_count = 1; + bgx->lmac_type = BGX_MODE_XAUI; + bgx->lane_to_sds = 0xE4; + break; + case QLM_MODE_RXAUI_2X2: + bgx->lmac_count = 2; + bgx->lmac_type = BGX_MODE_RXAUI; + bgx->lane_to_sds = 0xE4; + break; + case QLM_MODE_XFI_4X1: + bgx->lmac_count = 4; + bgx->lmac_type = BGX_MODE_XFI; + bgx->lane_to_sds = 0; + break; + case QLM_MODE_XLAUI_1X4: + bgx->lmac_count = 1; + bgx->lmac_type = BGX_MODE_XLAUI; + bgx->lane_to_sds = 0xE4; + break; + case QLM_MODE_10G_KR_4X1: + bgx->lmac_count = 4; + bgx->lmac_type = BGX_MODE_10G_KR; + bgx->lane_to_sds = 0; + bgx->use_training = 1; + break; + case QLM_MODE_40G_KR4_1X4: + bgx->lmac_count = 1; + bgx->lmac_type = BGX_MODE_40G_KR; + bgx->lane_to_sds = 0xE4; + bgx->use_training = 1; + break; + default: + bgx->lmac_count = 0; + break; + } + + /* Check if low level firmware has programmed LMAC count + * based on board type, if yes consider that otherwise + * the default static values + */ + lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; + if (lmac_count != 4) + bgx->lmac_count = lmac_count; +} + +static void bgx_init_hw(struct bgx *bgx) +{ + int i; + + bgx_set_num_ports(bgx); + + bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP); + if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS)) + dev_err(&bgx->pdev->dev, "BGX%d BIST failed\n", bgx->bgx_id); + + /* Set lmac type and lane2serdes mapping */ + for (i = 0; i < bgx->lmac_count; i++) { + if (bgx->lmac_type == BGX_MODE_RXAUI) { + if (i) + bgx->lane_to_sds = 0x0e; + else + bgx->lane_to_sds = 0x04; + bgx_reg_write(bgx, i, BGX_CMRX_CFG, + (bgx->lmac_type << 8) | bgx->lane_to_sds); + continue; + } + bgx_reg_write(bgx, i, BGX_CMRX_CFG, + (bgx->lmac_type << 8) | (bgx->lane_to_sds + i)); + bgx->lmac[i].lmacid_bd = lmac_count; + lmac_count++; + } + + bgx_reg_write(bgx, 0, BGX_CMR_TX_LMACS, bgx->lmac_count); + bgx_reg_write(bgx, 0, BGX_CMR_RX_LMACS, bgx->lmac_count); + + /* Set the backpressure AND mask */ + for (i = 0; i < bgx->lmac_count; i++) + bgx_reg_modify(bgx, 0, BGX_CMR_CHAN_MSK_AND, + ((1ULL << MAX_BGX_CHANS_PER_LMAC) - 1) << + (i * MAX_BGX_CHANS_PER_LMAC)); + + /* Disable all MAC filtering */ + for (i = 0; i < RX_DMAC_COUNT; i++) + bgx_reg_write(bgx, 0, BGX_CMR_RX_DMACX_CAM + (i * 8), 0x00); + + /* Disable MAC steering (NCSI traffic) */ + for (i = 0; i < RX_TRAFFIC_STEER_RULE_COUNT; i++) + bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00); +} + +static void bgx_get_qlm_mode(struct bgx *bgx) +{ + struct device *dev = &bgx->pdev->dev; + int lmac_type; + int train_en; + + /* Read LMAC0 type to figure out QLM mode + * This is configured by low level firmware + */ + lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); + lmac_type = (lmac_type >> 8) & 0x07; + + train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & + SPU_PMD_CRTL_TRAIN_EN; + + switch (lmac_type) { + case BGX_MODE_SGMII: + bgx->qlm_mode = QLM_MODE_SGMII; + dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id); + break; + case BGX_MODE_XAUI: + bgx->qlm_mode = QLM_MODE_XAUI_1X4; + dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id); + break; + case BGX_MODE_RXAUI: + bgx->qlm_mode = QLM_MODE_RXAUI_2X2; + dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id); + break; + case BGX_MODE_XFI: + if (!train_en) { + bgx->qlm_mode = QLM_MODE_XFI_4X1; + dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id); + } else { + bgx->qlm_mode = QLM_MODE_10G_KR_4X1; + dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id); + } + break; + case BGX_MODE_XLAUI: + if (!train_en) { + bgx->qlm_mode = QLM_MODE_XLAUI_1X4; + dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id); + } else { + bgx->qlm_mode = QLM_MODE_40G_KR4_1X4; + dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id); + } + break; + default: + bgx->qlm_mode = QLM_MODE_SGMII; + dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id); + } +} + +static void bgx_init_of(struct bgx *bgx, struct device_node *np) +{ + struct device_node *np_child; + u8 lmac = 0; + + for_each_child_of_node(np, np_child) { + struct device_node *phy_np; + const char *mac; + + phy_np = of_parse_phandle(np_child, "phy-handle", 0); + if (phy_np) + bgx->lmac[lmac].phydev = of_phy_find_device(phy_np); + + mac = of_get_mac_address(np_child); + if (mac) + ether_addr_copy(bgx->lmac[lmac].mac, mac); + + SET_NETDEV_DEV(&bgx->lmac[lmac].netdev, &bgx->pdev->dev); + bgx->lmac[lmac].lmacid = lmac; + lmac++; + if (lmac == MAX_LMAC_PER_BGX) + break; + } +} + +static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int err; + struct device *dev = &pdev->dev; + struct bgx *bgx = NULL; + struct device_node *np; + char bgx_sel[5]; + u8 lmac; + + bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL); + if (!bgx) + return -ENOMEM; + bgx->pdev = pdev; + + pci_set_drvdata(pdev, bgx); + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + pci_set_drvdata(pdev, NULL); + return err; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + /* MAP configuration registers */ + bgx->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!bgx->reg_base) { + dev_err(dev, "BGX: Cannot map CSR memory space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; + bgx->bgx_id += NODE_ID(pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM)) + * MAX_BGX_PER_CN88XX; + bgx_vnic[bgx->bgx_id] = bgx; + bgx_get_qlm_mode(bgx); + + snprintf(bgx_sel, 5, "bgx%d", bgx->bgx_id); + np = of_find_node_by_name(NULL, bgx_sel); + if (np) + bgx_init_of(bgx, np); + + bgx_init_hw(bgx); + + /* Enable all LMACs */ + for (lmac = 0; lmac < bgx->lmac_count; lmac++) { + err = bgx_lmac_enable(bgx, lmac); + if (err) { + dev_err(dev, "BGX%d failed to enable lmac%d\n", + bgx->bgx_id, lmac); + goto err_enable; + } + } + + return 0; + +err_enable: + bgx_vnic[bgx->bgx_id] = NULL; +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); + return err; +} + +static void bgx_remove(struct pci_dev *pdev) +{ + struct bgx *bgx = pci_get_drvdata(pdev); + u8 lmac; + + /* Disable all LMACs */ + for (lmac = 0; lmac < bgx->lmac_count; lmac++) + bgx_lmac_disable(bgx, lmac); + + bgx_vnic[bgx->bgx_id] = NULL; + pci_release_regions(pdev); + pci_disable_device(pdev); + pci_set_drvdata(pdev, NULL); +} + +static struct pci_driver bgx_driver = { + .name = DRV_NAME, + .id_table = bgx_id_table, + .probe = bgx_probe, + .remove = bgx_remove, +}; + +static int __init bgx_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&bgx_driver); +} + +static void __exit bgx_cleanup_module(void) +{ + pci_unregister_driver(&bgx_driver); +} + +module_init(bgx_init_module); +module_exit(bgx_cleanup_module); diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h new file mode 100644 index 000000000000..9d91ce44f8d7 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -0,0 +1,223 @@ +/* + * Copyright (C) 2015 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#ifndef THUNDER_BGX_H +#define THUNDER_BGX_H + +#define MAX_BGX_THUNDER 8 /* Max 4 nodes, 2 per node */ +#define MAX_BGX_PER_CN88XX 2 +#define MAX_LMAC_PER_BGX 4 +#define MAX_BGX_CHANS_PER_LMAC 16 +#define MAX_DMAC_PER_LMAC 8 +#define MAX_FRAME_SIZE 9216 + +#define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2 + +#define MAX_LMAC (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX) + +#define NODE_ID_MASK 0x300000000000 +#define NODE_ID(x) ((x & NODE_ID_MASK) >> 44) + +/* Registers */ +#define BGX_CMRX_CFG 0x00 +#define CMR_PKT_TX_EN BIT_ULL(13) +#define CMR_PKT_RX_EN BIT_ULL(14) +#define CMR_EN BIT_ULL(15) +#define BGX_CMR_GLOBAL_CFG 0x08 +#define CMR_GLOBAL_CFG_FCS_STRIP BIT_ULL(6) +#define BGX_CMRX_RX_ID_MAP 0x60 +#define BGX_CMRX_RX_STAT0 0x70 +#define BGX_CMRX_RX_STAT1 0x78 +#define BGX_CMRX_RX_STAT2 0x80 +#define BGX_CMRX_RX_STAT3 0x88 +#define BGX_CMRX_RX_STAT4 0x90 +#define BGX_CMRX_RX_STAT5 0x98 +#define BGX_CMRX_RX_STAT6 0xA0 +#define BGX_CMRX_RX_STAT7 0xA8 +#define BGX_CMRX_RX_STAT8 0xB0 +#define BGX_CMRX_RX_STAT9 0xB8 +#define BGX_CMRX_RX_STAT10 0xC0 +#define BGX_CMRX_RX_BP_DROP 0xC8 +#define BGX_CMRX_RX_DMAC_CTL 0x0E8 +#define BGX_CMR_RX_DMACX_CAM 0x200 +#define RX_DMACX_CAM_EN BIT_ULL(48) +#define RX_DMACX_CAM_LMACID(x) (x << 49) +#define RX_DMAC_COUNT 32 +#define BGX_CMR_RX_STREERING 0x300 +#define RX_TRAFFIC_STEER_RULE_COUNT 8 +#define BGX_CMR_CHAN_MSK_AND 0x450 +#define BGX_CMR_BIST_STATUS 0x460 +#define BGX_CMR_RX_LMACS 0x468 +#define BGX_CMRX_TX_STAT0 0x600 +#define BGX_CMRX_TX_STAT1 0x608 +#define BGX_CMRX_TX_STAT2 0x610 +#define BGX_CMRX_TX_STAT3 0x618 +#define BGX_CMRX_TX_STAT4 0x620 +#define BGX_CMRX_TX_STAT5 0x628 +#define BGX_CMRX_TX_STAT6 0x630 +#define BGX_CMRX_TX_STAT7 0x638 +#define BGX_CMRX_TX_STAT8 0x640 +#define BGX_CMRX_TX_STAT9 0x648 +#define BGX_CMRX_TX_STAT10 0x650 +#define BGX_CMRX_TX_STAT11 0x658 +#define BGX_CMRX_TX_STAT12 0x660 +#define BGX_CMRX_TX_STAT13 0x668 +#define BGX_CMRX_TX_STAT14 0x670 +#define BGX_CMRX_TX_STAT15 0x678 +#define BGX_CMRX_TX_STAT16 0x680 +#define BGX_CMRX_TX_STAT17 0x688 +#define BGX_CMR_TX_LMACS 0x1000 + +#define BGX_SPUX_CONTROL1 0x10000 +#define SPU_CTL_LOW_POWER BIT_ULL(11) +#define SPU_CTL_RESET BIT_ULL(15) +#define BGX_SPUX_STATUS1 0x10008 +#define SPU_STATUS1_RCV_LNK BIT_ULL(2) +#define BGX_SPUX_STATUS2 0x10020 +#define SPU_STATUS2_RCVFLT BIT_ULL(10) +#define BGX_SPUX_BX_STATUS 0x10028 +#define SPU_BX_STATUS_RX_ALIGN BIT_ULL(12) +#define BGX_SPUX_BR_STATUS1 0x10030 +#define SPU_BR_STATUS_BLK_LOCK BIT_ULL(0) +#define SPU_BR_STATUS_RCV_LNK BIT_ULL(12) +#define BGX_SPUX_BR_PMD_CRTL 0x10068 +#define SPU_PMD_CRTL_TRAIN_EN BIT_ULL(1) +#define BGX_SPUX_BR_PMD_LP_CUP 0x10078 +#define BGX_SPUX_BR_PMD_LD_CUP 0x10088 +#define BGX_SPUX_BR_PMD_LD_REP 0x10090 +#define BGX_SPUX_FEC_CONTROL 0x100A0 +#define SPU_FEC_CTL_FEC_EN BIT_ULL(0) +#define SPU_FEC_CTL_ERR_EN BIT_ULL(1) +#define BGX_SPUX_AN_CONTROL 0x100C8 +#define SPU_AN_CTL_AN_EN BIT_ULL(12) +#define SPU_AN_CTL_XNP_EN BIT_ULL(13) +#define BGX_SPUX_AN_ADV 0x100D8 +#define BGX_SPUX_MISC_CONTROL 0x10218 +#define SPU_MISC_CTL_INTLV_RDISP BIT_ULL(10) +#define SPU_MISC_CTL_RX_DIS BIT_ULL(12) +#define BGX_SPUX_INT 0x10220 /* +(0..3) << 20 */ +#define BGX_SPUX_INT_W1S 0x10228 +#define BGX_SPUX_INT_ENA_W1C 0x10230 +#define BGX_SPUX_INT_ENA_W1S 0x10238 +#define BGX_SPU_DBG_CONTROL 0x10300 +#define SPU_DBG_CTL_AN_ARB_LINK_CHK_EN BIT_ULL(18) +#define SPU_DBG_CTL_AN_NONCE_MCT_DIS BIT_ULL(29) + +#define BGX_SMUX_RX_INT 0x20000 +#define BGX_SMUX_RX_JABBER 0x20030 +#define BGX_SMUX_RX_CTL 0x20048 +#define SMU_RX_CTL_STATUS (3ull << 0) +#define BGX_SMUX_TX_APPEND 0x20100 +#define SMU_TX_APPEND_FCS_D BIT_ULL(2) +#define BGX_SMUX_TX_MIN_PKT 0x20118 +#define BGX_SMUX_TX_INT 0x20140 +#define BGX_SMUX_TX_CTL 0x20178 +#define SMU_TX_CTL_DIC_EN BIT_ULL(0) +#define SMU_TX_CTL_UNI_EN BIT_ULL(1) +#define SMU_TX_CTL_LNK_STATUS (3ull << 4) +#define BGX_SMUX_TX_THRESH 0x20180 +#define BGX_SMUX_CTL 0x20200 +#define SMU_CTL_RX_IDLE BIT_ULL(0) +#define SMU_CTL_TX_IDLE BIT_ULL(1) + +#define BGX_GMP_PCS_MRX_CTL 0x30000 +#define PCS_MRX_CTL_RST_AN BIT_ULL(9) +#define PCS_MRX_CTL_PWR_DN BIT_ULL(11) +#define PCS_MRX_CTL_AN_EN BIT_ULL(12) +#define PCS_MRX_CTL_RESET BIT_ULL(15) +#define BGX_GMP_PCS_MRX_STATUS 0x30008 +#define PCS_MRX_STATUS_AN_CPT BIT_ULL(5) +#define BGX_GMP_PCS_ANX_AN_RESULTS 0x30020 +#define BGX_GMP_PCS_SGM_AN_ADV 0x30068 +#define BGX_GMP_PCS_MISCX_CTL 0x30078 +#define PCS_MISC_CTL_GMX_ENO BIT_ULL(11) +#define PCS_MISC_CTL_SAMP_PT_MASK 0x7Full +#define BGX_GMP_GMI_PRTX_CFG 0x38020 +#define GMI_PORT_CFG_SPEED BIT_ULL(1) +#define GMI_PORT_CFG_DUPLEX BIT_ULL(2) +#define GMI_PORT_CFG_SLOT_TIME BIT_ULL(3) +#define GMI_PORT_CFG_SPEED_MSB BIT_ULL(8) +#define BGX_GMP_GMI_RXX_JABBER 0x38038 +#define BGX_GMP_GMI_TXX_THRESH 0x38210 +#define BGX_GMP_GMI_TXX_APPEND 0x38218 +#define BGX_GMP_GMI_TXX_SLOT 0x38220 +#define BGX_GMP_GMI_TXX_BURST 0x38228 +#define BGX_GMP_GMI_TXX_MIN_PKT 0x38240 +#define BGX_GMP_GMI_TXX_SGMII_CTL 0x38300 + +#define BGX_MSIX_VEC_0_29_ADDR 0x400000 /* +(0..29) << 4 */ +#define BGX_MSIX_VEC_0_29_CTL 0x400008 +#define BGX_MSIX_PBA_0 0x4F0000 + +/* MSI-X interrupts */ +#define BGX_MSIX_VECTORS 30 +#define BGX_LMAC_VEC_OFFSET 7 +#define BGX_MSIX_VEC_SHIFT 4 + +#define CMRX_INT 0 +#define SPUX_INT 1 +#define SMUX_RX_INT 2 +#define SMUX_TX_INT 3 +#define GMPX_PCS_INT 4 +#define GMPX_GMI_RX_INT 5 +#define GMPX_GMI_TX_INT 6 +#define CMR_MEM_INT 28 +#define SPU_MEM_INT 29 + +#define LMAC_INTR_LINK_UP BIT(0) +#define LMAC_INTR_LINK_DOWN BIT(1) + +/* RX_DMAC_CTL configuration*/ +enum MCAST_MODE { + MCAST_MODE_REJECT, + MCAST_MODE_ACCEPT, + MCAST_MODE_CAM_FILTER, + RSVD +}; + +#define BCAST_ACCEPT 1 +#define CAM_ACCEPT 1 + +void bgx_add_dmac_addr(u64 dmac, int node, int bgx_idx, int lmac); +unsigned bgx_get_map(int node); +int bgx_get_lmac_count(int node, int bgx); +const char *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid); +void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const char *mac); +void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status); +u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx); +u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx); +#define BGX_RX_STATS_COUNT 11 +#define BGX_TX_STATS_COUNT 18 + +struct bgx_stats { + u64 rx_stats[BGX_RX_STATS_COUNT]; + u64 tx_stats[BGX_TX_STATS_COUNT]; +}; + +enum LMAC_TYPE { + BGX_MODE_SGMII = 0, /* 1 lane, 1.250 Gbaud */ + BGX_MODE_XAUI = 1, /* 4 lanes, 3.125 Gbaud */ + BGX_MODE_DXAUI = 1, /* 4 lanes, 6.250 Gbaud */ + BGX_MODE_RXAUI = 2, /* 2 lanes, 6.250 Gbaud */ + BGX_MODE_XFI = 3, /* 1 lane, 10.3125 Gbaud */ + BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */ + BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */ + BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */ +}; + +enum qlm_mode { + QLM_MODE_SGMII, /* SGMII, each lane independent */ + QLM_MODE_XAUI_1X4, /* 1 XAUI or DXAUI, 4 lanes */ + QLM_MODE_RXAUI_2X2, /* 2 RXAUI, 2 lanes each */ + QLM_MODE_XFI_4X1, /* 4 XFI, 1 lane each */ + QLM_MODE_XLAUI_1X4, /* 1 XLAUI, 4 lanes each */ + QLM_MODE_10G_KR_4X1, /* 4 10GBASE-KR, 1 lane each */ + QLM_MODE_40G_KR4_1X4, /* 1 40GBASE-KR4, 4 lanes each */ +}; + +#endif /* THUNDER_BGX_H */ |