From 6b9dbaff83d626e5a9cc0c01a2b302f8fe423a1a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 23 Oct 2023 15:19:49 +0200 Subject: wifi: remove orphaned ray_cs driver Aviator/Raytheon is an early PCMCIA driver, apparently predating 802.11b and only supporting wireless extensions. The driver has been orphaned since 2010 and only seen cosmetic updates long before than. Jean Tourrilhes pointed out in a 2005 changelog that he tested a change on actual hardware, which was apparently already noteworthy back then. Signed-off-by: Arnd Bergmann Signed-off-by: Kalle Valo --- .../networking/device_drivers/wifi/index.rst | 1 - .../networking/device_drivers/wifi/ray_cs.rst | 165 --------------------- 2 files changed, 166 deletions(-) delete mode 100644 Documentation/networking/device_drivers/wifi/ray_cs.rst (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/wifi/index.rst b/Documentation/networking/device_drivers/wifi/index.rst index bf91a87c7acf..fb394f5de4a9 100644 --- a/Documentation/networking/device_drivers/wifi/index.rst +++ b/Documentation/networking/device_drivers/wifi/index.rst @@ -10,7 +10,6 @@ Contents: intel/ipw2100 intel/ipw2200 - ray_cs .. only:: subproject and html diff --git a/Documentation/networking/device_drivers/wifi/ray_cs.rst b/Documentation/networking/device_drivers/wifi/ray_cs.rst deleted file mode 100644 index 9a46d1ae8f20..000000000000 --- a/Documentation/networking/device_drivers/wifi/ray_cs.rst +++ /dev/null @@ -1,165 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -.. include:: - -========================= -Raylink wireless LAN card -========================= - -September 21, 1999 - -Copyright |copy| 1998 Corey Thomas (corey@world.std.com) - -This file is the documentation for the Raylink Wireless LAN card driver for -Linux. The Raylink wireless LAN card is a PCMCIA card which provides IEEE -802.11 compatible wireless network connectivity at 1 and 2 megabits/second. -See http://www.raytheon.com/micro/raylink/ for more information on the Raylink -card. This driver is in early development and does have bugs. See the known -bugs and limitations at the end of this document for more information. -This driver also works with WebGear's Aviator 2.4 and Aviator Pro -wireless LAN cards. - -As of kernel 2.3.18, the ray_cs driver is part of the Linux kernel -source. My web page for the development of ray_cs is at -http://web.ralinktech.com/ralink/Home/Support/Linux.html -and I can be emailed at corey@world.std.com - -The kernel driver is based on ray_cs-1.62.tgz - -The driver at my web page is intended to be used as an add on to -David Hinds pcmcia package. All the command line parameters are -available when compiled as a module. When built into the kernel, only -the essid= string parameter is available via the kernel command line. -This will change after the method of sorting out parameters for all -the PCMCIA drivers is agreed upon. If you must have a built in driver -with nondefault parameters, they can be edited in -/usr/src/linux/drivers/net/pcmcia/ray_cs.c. Searching for module_param -will find them all. - -Information on card services is available at: - - http://pcmcia-cs.sourceforge.net/ - - -Card services user programs are still required for PCMCIA devices. -pcmcia-cs-3.1.1 or greater is required for the kernel version of -the driver. - -Currently, ray_cs is not part of David Hinds card services package, -so the following magic is required. - -At the end of the /etc/pcmcia/config.opts file, add the line: -source ./ray_cs.opts -This will make card services read the ray_cs.opts file -when starting. Create the file /etc/pcmcia/ray_cs.opts containing the -following:: - - #### start of /etc/pcmcia/ray_cs.opts ################### - # Configuration options for Raylink Wireless LAN PCMCIA card - device "ray_cs" - class "network" module "misc/ray_cs" - - card "RayLink PC Card WLAN Adapter" - manfid 0x01a6, 0x0000 - bind "ray_cs" - - module "misc/ray_cs" opts "" - #### end of /etc/pcmcia/ray_cs.opts ##################### - - -To join an existing network with -different parameters, contact the network administrator for the -configuration information, and edit /etc/pcmcia/ray_cs.opts. -Add the parameters below between the empty quotes. - -Parameters for ray_cs driver which may be specified in ray_cs.opts: - -=============== =============== ============================================= -bc integer 0 = normal mode (802.11 timing), - 1 = slow down inter frame timing to allow - operation with older breezecom access - points. - -beacon_period integer beacon period in Kilo-microseconds, - - legal values = must be integer multiple - of hop dwell - - default = 256 - -country integer 1 = USA (default), - 2 = Europe, - 3 = Japan, - 4 = Korea, - 5 = Spain, - 6 = France, - 7 = Israel, - 8 = Australia - -essid string ESS ID - network name to join - - string with maximum length of 32 chars - default value = "ADHOC_ESSID" - -hop_dwell integer hop dwell time in Kilo-microseconds - - legal values = 16,32,64,128(default),256 - -irq_mask integer linux standard 16 bit value 1bit/IRQ - - lsb is IRQ 0, bit 1 is IRQ 1 etc. - Used to restrict choice of IRQ's to use. - Recommended method for controlling - interrupts is in /etc/pcmcia/config.opts - -net_type integer 0 (default) = adhoc network, - 1 = infrastructure - -phy_addr string string containing new MAC address in - hex, must start with x eg - x00008f123456 - -psm integer 0 = continuously active, - 1 = power save mode (not useful yet) - -pc_debug integer (0-5) larger values for more verbose - logging. Replaces ray_debug. - -ray_debug integer Replaced with pc_debug - -ray_mem_speed integer defaults to 500 - -sniffer integer 0 = not sniffer (default), - 1 = sniffer which can be used to record all - network traffic using tcpdump or similar, - but no normal network use is allowed. - -translate integer 0 = no translation (encapsulate frames), - 1 = translation (RFC1042/802.1) -=============== =============== ============================================= - -More on sniffer mode: - -tcpdump does not understand 802.11 headers, so it can't -interpret the contents, but it can record to a file. This is only -useful for debugging 802.11 lowlevel protocols that are not visible to -linux. If you want to watch ftp xfers, or do similar things, you -don't need to use sniffer mode. Also, some packet types are never -sent up by the card, so you will never see them (ack, rts, cts, probe -etc.) There is a simple program (showcap) included in the ray_cs -package which parses the 802.11 headers. - -Known Problems and missing features - - Does not work with non x86 - - Does not work with SMP - - Support for defragmenting frames is not yet debugged, and in - fact is known to not work. I have never encountered a net set - up to fragment, but still, it should be fixed. - - The ioctl support is incomplete. The hardware address cannot be set - using ifconfig yet. If a different hardware address is needed, it may - be set using the phy_addr parameter in ray_cs.opts. This requires - a card insertion to take effect. -- cgit v1.3.1 From 045edee19d591e59ed53772bf6dfc9b1ed9577eb Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 6 Nov 2023 20:57:25 -0800 Subject: bpf: Introduce KF_ARG_PTR_TO_CONST_STR Similar to ARG_PTR_TO_CONST_STR for BPF helpers, KF_ARG_PTR_TO_CONST_STR specifies kfunc args that point to const strings. Annotation "__str" is used to specify kfunc arg of type KF_ARG_PTR_TO_CONST_STR. Also, add documentation for the "__str" annotation. bpf_get_file_xattr() will be the first kfunc that uses this type. Signed-off-by: Song Liu Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Vadim Fedorenko Link: https://lore.kernel.org/bpf/20231107045725.2278852-4-song@kernel.org Signed-off-by: Alexei Starovoitov --- Documentation/bpf/kfuncs.rst | 24 ++++++++++++++++++++++++ kernel/bpf/verifier.c | 19 +++++++++++++++++++ 2 files changed, 43 insertions(+) (limited to 'Documentation') diff --git a/Documentation/bpf/kfuncs.rst b/Documentation/bpf/kfuncs.rst index 723408e399ab..7985c6615f3c 100644 --- a/Documentation/bpf/kfuncs.rst +++ b/Documentation/bpf/kfuncs.rst @@ -135,6 +135,30 @@ Either way, the returned buffer is either NULL, or of size buffer_szk. Without t annotation, the verifier will reject the program if a null pointer is passed in with a nonzero size. +2.2.5 __str Annotation +---------------------------- +This annotation is used to indicate that the argument is a constant string. + +An example is given below:: + + __bpf_kfunc bpf_get_file_xattr(..., const char *name__str, ...) + { + ... + } + +In this case, ``bpf_get_file_xattr()`` can be called as:: + + bpf_get_file_xattr(..., "xattr_name", ...); + +Or:: + + const char name[] = "xattr_name"; /* This need to be global */ + int BPF_PROG(...) + { + ... + bpf_get_file_xattr(..., name, ...); + ... + } .. _BPF_kfunc_nodef: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 637f34231633..9276e0abcb4b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10810,6 +10810,11 @@ static bool is_kfunc_arg_nullable(const struct btf *btf, const struct btf_param return __kfunc_param_match_suffix(btf, arg, "__nullable"); } +static bool is_kfunc_arg_const_str(const struct btf *btf, const struct btf_param *arg) +{ + return __kfunc_param_match_suffix(btf, arg, "__str"); +} + static bool is_kfunc_arg_scalar_with_name(const struct btf *btf, const struct btf_param *arg, const char *name) @@ -10953,6 +10958,7 @@ enum kfunc_ptr_arg_type { KF_ARG_PTR_TO_RB_ROOT, KF_ARG_PTR_TO_RB_NODE, KF_ARG_PTR_TO_NULL, + KF_ARG_PTR_TO_CONST_STR, }; enum special_kfunc_type { @@ -11103,6 +11109,9 @@ get_kfunc_ptr_arg_type(struct bpf_verifier_env *env, if (is_kfunc_arg_rbtree_node(meta->btf, &args[argno])) return KF_ARG_PTR_TO_RB_NODE; + if (is_kfunc_arg_const_str(meta->btf, &args[argno])) + return KF_ARG_PTR_TO_CONST_STR; + if ((base_type(reg->type) == PTR_TO_BTF_ID || reg2btf_ids[base_type(reg->type)])) { if (!btf_type_is_struct(ref_t)) { verbose(env, "kernel function %s args#%d pointer type %s %s is not supported\n", @@ -11734,6 +11743,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ case KF_ARG_PTR_TO_MEM_SIZE: case KF_ARG_PTR_TO_CALLBACK: case KF_ARG_PTR_TO_REFCOUNTED_KPTR: + case KF_ARG_PTR_TO_CONST_STR: /* Trusted by default */ break; default: @@ -12005,6 +12015,15 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_ meta->arg_btf = reg->btf; meta->arg_btf_id = reg->btf_id; break; + case KF_ARG_PTR_TO_CONST_STR: + if (reg->type != PTR_TO_MAP_VALUE) { + verbose(env, "arg#%d doesn't point to a const string\n", i); + return -EINVAL; + } + ret = check_reg_const_str(env, reg, regno); + if (ret) + return ret; + break; } } -- cgit v1.3.1 From 0fbe92b9fd4d3287f291e8f1b03d63dc4a2f38a4 Mon Sep 17 00:00:00 2001 From: Christian Marangi Date: Tue, 14 Nov 2023 15:08:44 +0100 Subject: dt-bindings: Document Marvell Aquantia PHY Document bindings for Marvell Aquantia PHY. The Marvell Aquantia PHY require a firmware to work correctly and there at least 3 way to load this firmware. Describe all the different way and document the binding "firmware-name" to load the PHY firmware from userspace. Signed-off-by: Christian Marangi Reviewed-by: Conor Dooley Acked-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/marvell,aquantia.yaml | 116 +++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/marvell,aquantia.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/marvell,aquantia.yaml b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml new file mode 100644 index 000000000000..9854fab4c4db --- /dev/null +++ b/Documentation/devicetree/bindings/net/marvell,aquantia.yaml @@ -0,0 +1,116 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/marvell,aquantia.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell Aquantia Ethernet PHY + +maintainers: + - Christian Marangi + +description: | + Marvell Aquantia Ethernet PHY require a firmware to be loaded to actually + work. + + This can be done and is implemented by OEM in 3 different way: + - Attached SPI flash directly to the PHY with the firmware. The PHY + will self load the firmware in the presence of this configuration. + - Read from a dedicated partition on system NAND declared in an + NVMEM cell, and loaded to the PHY using its mailbox interface. + - Manually provided firmware loaded from a file in the filesystem. + +allOf: + - $ref: ethernet-phy.yaml# + +select: + properties: + compatible: + contains: + enum: + - ethernet-phy-id03a1.b445 + - ethernet-phy-id03a1.b460 + - ethernet-phy-id03a1.b4a2 + - ethernet-phy-id03a1.b4d0 + - ethernet-phy-id03a1.b4e0 + - ethernet-phy-id03a1.b5c2 + - ethernet-phy-id03a1.b4b0 + - ethernet-phy-id03a1.b662 + - ethernet-phy-id03a1.b712 + - ethernet-phy-id31c3.1c12 + required: + - compatible + +properties: + reg: + maxItems: 1 + + firmware-name: + description: specify the name of PHY firmware to load + + nvmem-cells: + description: phandle to the firmware nvmem cell + maxItems: 1 + + nvmem-cell-names: + const: firmware + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-phy@0 { + compatible = "ethernet-phy-id31c3.1c12", + "ethernet-phy-ieee802.3-c45"; + + reg = <0>; + firmware-name = "AQR-G4_v5.4.C-AQR_CIG_WF-1945_0x8_ID44776_VER1630.cld"; + }; + + ethernet-phy@1 { + compatible = "ethernet-phy-id31c3.1c12", + "ethernet-phy-ieee802.3-c45"; + + reg = <1>; + nvmem-cells = <&aqr_fw>; + nvmem-cell-names = "firmware"; + }; + }; + + flash { + compatible = "jedec,spi-nor"; + #address-cells = <1>; + #size-cells = <1>; + + partitions { + compatible = "fixed-partitions"; + #address-cells = <1>; + #size-cells = <1>; + + /* ... */ + + partition@650000 { + compatible = "nvmem-cells"; + label = "0:ethphyfw"; + reg = <0x650000 0x80000>; + read-only; + #address-cells = <1>; + #size-cells = <1>; + + aqr_fw: aqr_fw@0 { + reg = <0x0 0x5f42a>; + }; + }; + + /* ... */ + + }; + }; -- cgit v1.3.1 From 7c93d177d913a3a43bcb8f8edd4e2f78074a18a3 Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Wed, 15 Nov 2023 21:04:48 +0000 Subject: dt-bindings: net: renesas,etheravb: Document RZ/Five SoC The Gigabit Ethernet IP block on the RZ/Five SoC is identical to one found on the RZ/G2UL SoC. "renesas,r9a07g043-gbeth" compatible string will be used on the RZ/Five SoC so to make this clear and to keep this file consistent, update the comment to include RZ/Five SoC. No driver changes are required as generic compatible string "renesas,rzg2l-gbeth" will be used as a fallback on RZ/Five SoC. Signed-off-by: Lad Prabhakar Acked-by: Conor Dooley Reviewed-by: Sergey Shtylyov Reviewed-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index 5d074f27d462..d3306b186000 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -55,7 +55,7 @@ properties: - items: - enum: - - renesas,r9a07g043-gbeth # RZ/G2UL + - renesas,r9a07g043-gbeth # RZ/G2UL and RZ/Five - renesas,r9a07g044-gbeth # RZ/G2{L,LC} - renesas,r9a07g054-gbeth # RZ/V2L - const: renesas,rzg2l-gbeth # RZ/{G2L,G2UL,V2L} family -- cgit v1.3.1 From 11d55be06df0aedf19b05ab61c2d26b31a3c7e64 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:36 +0100 Subject: net: ethtool: Add a command to expose current time stamping layer Time stamping on network packets may happen either in the MAC or in the PHY, but not both. In preparation for making the choice selectable, expose both the current layers via ethtool. In accordance with the kernel implementation as it stands, the current layer will always read as "phy" when a PHY time stamping device is present. Future patches will allow changing the current layer administratively. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 23 ++++++++ include/uapi/linux/ethtool_netlink.h | 14 +++++ include/uapi/linux/net_tstamp.h | 10 ++++ net/ethtool/Makefile | 2 +- net/ethtool/common.h | 1 + net/ethtool/netlink.c | 10 ++++ net/ethtool/netlink.h | 2 + net/ethtool/ts.c | 88 ++++++++++++++++++++++++++++ 8 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/ts.c (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 2540c70952ff..644b3b764044 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -225,6 +225,7 @@ Userspace to kernel: ``ETHTOOL_MSG_RSS_GET`` get RSS settings ``ETHTOOL_MSG_MM_GET`` get MAC merge layer state ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters + ``ETHTOOL_MSG_TS_GET`` get current timestamping ===================================== ================================= Kernel to userspace: @@ -268,6 +269,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PSE_GET_REPLY`` PSE parameters ``ETHTOOL_MSG_RSS_GET_REPLY`` RSS settings ``ETHTOOL_MSG_MM_GET_REPLY`` MAC merge layer status + ``ETHTOOL_MSG_TS_GET_REPLY`` current timestamping ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1994,6 +1996,26 @@ The attributes are propagated to the driver through the following structure: .. kernel-doc:: include/linux/ethtool.h :identifiers: ethtool_mm_cfg +TS_GET +====== + +Gets current timestamping. + +Request contents: + + ================================= ====== ==================== + ``ETHTOOL_A_TS_HEADER`` nested request header + ================================= ====== ==================== + +Kernel response contents: + + ======================= ====== ============================== + ``ETHTOOL_A_TS_HEADER`` nested reply header + ``ETHTOOL_A_TS_LAYER`` u32 current timestamping + ======================= ====== ============================== + +This command get the current timestamp layer. + Request translation =================== @@ -2100,4 +2122,5 @@ are netlink only. n/a ``ETHTOOL_MSG_PLCA_GET_STATUS`` n/a ``ETHTOOL_MSG_MM_GET`` n/a ``ETHTOOL_MSG_MM_SET`` + n/a ``ETHTOOL_MSG_TS_GET`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 73e2c10dc2cc..cb51136328cf 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_TS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,7 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_TS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -975,6 +977,18 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +/* TS LAYER */ + +enum { + ETHTOOL_A_TS_UNSPEC, + ETHTOOL_A_TS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TS_LAYER, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TS_CNT, + ETHTOOL_A_TS_MAX = (__ETHTOOL_A_TS_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index df8091998c8d..4551fb3d7720 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,6 +13,16 @@ #include #include /* for SO_TIMESTAMPING */ +/* Layer of the TIMESTAMPING provider */ +enum timestamping_layer { + NO_TIMESTAMPING, + SOFTWARE_TIMESTAMPING, + MAC_TIMESTAMPING, + PHY_TIMESTAMPING, + + __TIMESTAMPING_COUNT, +}; + /* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 504f954a1b28..4ea64c080639 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ - module.o pse-pd.o plca.o mm.o + module.o pse-pd.o plca.o mm.o ts.o diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 28b8aaaf9bcb..a264b635f7d3 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -35,6 +35,7 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; extern const char ts_tx_type_names[][ETH_GSTRING_LEN]; extern const char ts_rx_filter_names[][ETH_GSTRING_LEN]; +extern const char ts_layer_names[][ETH_GSTRING_LEN]; extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN]; int __ethtool_get_link(struct net_device *dev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 3bbd5afb7b31..561c0931d055 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -306,6 +306,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_PLCA_GET_STATUS] = ðnl_plca_status_request_ops, [ETHTOOL_MSG_MM_GET] = ðnl_mm_request_ops, [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, + [ETHTOOL_MSG_TS_GET] = ðnl_ts_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1128,6 +1129,15 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_mm_set_policy, .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_TS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_ts_get_policy, + .maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a333a8d04c1..1e6085198acc 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -395,6 +395,7 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; +extern const struct ethnl_request_ops ethnl_ts_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -441,6 +442,7 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1] extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1]; extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; +extern const struct nla_policy ethnl_ts_get_policy[ETHTOOL_A_TS_HEADER + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/ts.c b/net/ethtool/ts.c new file mode 100644 index 000000000000..066cb06f4d0b --- /dev/null +++ b/net/ethtool/ts.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct ts_req_info { + struct ethnl_req_info base; +}; + +struct ts_reply_data { + struct ethnl_reply_data base; + enum timestamping_layer ts_layer; +}; + +#define TS_REPDATA(__reply_base) \ + container_of(__reply_base, struct ts_reply_data, base) + +/* TS_GET */ +const struct nla_policy ethnl_ts_get_policy[] = { + [ETHTOOL_A_TS_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), +}; + +static int ts_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + const struct genl_info *info) +{ + struct ts_reply_data *data = TS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + const struct ethtool_ops *ops = dev->ethtool_ops; + int ret; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + if (phy_has_tsinfo(dev->phydev)) { + data->ts_layer = PHY_TIMESTAMPING; + } else if (ops->get_ts_info) { + struct ethtool_ts_info ts_info = {0}; + + ops->get_ts_info(dev, &ts_info); + if (ts_info.so_timestamping & + SOF_TIMESTAMPING_HARDWARE_MASK) + data->ts_layer = MAC_TIMESTAMPING; + + if (ts_info.so_timestamping & + SOF_TIMESTAMPING_SOFTWARE_MASK) + data->ts_layer = SOFTWARE_TIMESTAMPING; + } else { + data->ts_layer = NO_TIMESTAMPING; + } + + ethnl_ops_complete(dev); + + return ret; +} + +static int ts_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u32)); +} + +static int ts_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + struct ts_reply_data *data = TS_REPDATA(reply_base); + + return nla_put_u32(skb, ETHTOOL_A_TS_LAYER, data->ts_layer); +} + +const struct ethnl_request_ops ethnl_ts_request_ops = { + .request_cmd = ETHTOOL_MSG_TS_GET, + .reply_cmd = ETHTOOL_MSG_TS_GET_REPLY, + .hdr_attr = ETHTOOL_A_TS_HEADER, + .req_info_size = sizeof(struct ts_req_info), + .reply_data_size = sizeof(struct ts_reply_data), + + .prepare_data = ts_prepare_data, + .reply_size = ts_reply_size, + .fill_reply = ts_fill_reply, +}; -- cgit v1.3.1 From bb8645b00ced1db036adf9ad7d9baaf1890aa2e6 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:37 +0100 Subject: netlink: specs: Introduce new netlink command to get current timestamp Add a new commands allowing to get the current time stamping on a netdevice's link. Example usage : ./ynl/cli.py --spec netlink/specs/ethtool.yaml --no-schema --do ts-get \ --json '{"header":{"dev-name":"eth0"}}' {'header': {'dev-index': 3, 'dev-name': 'eth0'}, 'ts-layer': 1} Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 5c7a65b009b4..61b30701fdad 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -939,6 +939,16 @@ attribute-sets: - name: burst-tmr type: u32 + - + name: ts + attributes: + - + name: header + type: nest + nested-attributes: header + - + name: ts-layer + type: u32 operations: enum-model: directional @@ -1689,3 +1699,17 @@ operations: name: mm-ntf doc: Notification for change in MAC Merge configuration. notify: mm-get + - + name: ts-get + doc: Get current timestamp + + attribute-set: ts + + do: + request: + attributes: + - header + reply: + attributes: &ts + - header + - ts-layer -- cgit v1.3.1 From d905f9c753295ee5a30af265f4b724f10050e7d3 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:38 +0100 Subject: net: ethtool: Add a command to list available time stamping layers Introduce a new netlink message that lists all available time stamping layers on a given interface. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 23 +++++++++ include/uapi/linux/ethtool_netlink.h | 14 ++++++ net/ethtool/netlink.c | 10 ++++ net/ethtool/netlink.h | 1 + net/ethtool/ts.c | 73 ++++++++++++++++++++++++++++ 5 files changed, 121 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 644b3b764044..b8d00676ed82 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -226,6 +226,7 @@ Userspace to kernel: ``ETHTOOL_MSG_MM_GET`` get MAC merge layer state ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters ``ETHTOOL_MSG_TS_GET`` get current timestamping + ``ETHTOOL_MSG_TS_LIST_GET`` list available timestampings ===================================== ================================= Kernel to userspace: @@ -270,6 +271,7 @@ Kernel to userspace: ``ETHTOOL_MSG_RSS_GET_REPLY`` RSS settings ``ETHTOOL_MSG_MM_GET_REPLY`` MAC merge layer status ``ETHTOOL_MSG_TS_GET_REPLY`` current timestamping + ``ETHTOOL_MSG_TS_LIST_GET_REPLY`` available timestampings ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -2016,6 +2018,26 @@ Kernel response contents: This command get the current timestamp layer. +TS_LIST_GET +=========== + +Get the list of available timestampings. + +Request contents: + + ================================= ====== ==================== + ``ETHTOOL_A_TS_HEADER`` nested request header + ================================= ====== ==================== + +Kernel response contents: + + =========================== ====== ============================== + ``ETHTOOL_A_TS_HEADER`` nested reply header + ``ETHTOOL_A_TS_LIST_LAYER`` binary available timestampings + =========================== ====== ============================== + +This command lists all the possible timestamp layer available. + Request translation =================== @@ -2123,4 +2145,5 @@ are netlink only. n/a ``ETHTOOL_MSG_MM_GET`` n/a ``ETHTOOL_MSG_MM_SET`` n/a ``ETHTOOL_MSG_TS_GET`` + n/a ``ETHTOOL_MSG_TS_LIST_GET`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index cb51136328cf..62b885d44d06 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -58,6 +58,7 @@ enum { ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_TS_GET, + ETHTOOL_MSG_TS_LIST_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -111,6 +112,7 @@ enum { ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, ETHTOOL_MSG_TS_GET_REPLY, + ETHTOOL_MSG_TS_LIST_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -989,6 +991,18 @@ enum { ETHTOOL_A_TS_MAX = (__ETHTOOL_A_TS_CNT - 1) }; +/* TS LIST LAYER */ + +enum { + ETHTOOL_A_TS_LIST_UNSPEC, + ETHTOOL_A_TS_LIST_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_TS_LIST_LAYER, /* array, u32 */ + + /* add new constants above here */ + __ETHTOOL_A_TS_LIST_CNT, + ETHTOOL_A_TS_LIST_MAX = (__ETHTOOL_A_TS_LIST_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 561c0931d055..842c9db1531f 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -307,6 +307,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_MM_GET] = ðnl_mm_request_ops, [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, [ETHTOOL_MSG_TS_GET] = ðnl_ts_request_ops, + [ETHTOOL_MSG_TS_LIST_GET] = ðnl_ts_list_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1138,6 +1139,15 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_ts_get_policy, .maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_TS_LIST_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + .policy = ethnl_ts_get_policy, + .maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 1e6085198acc..ea8c312db3af 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -396,6 +396,7 @@ extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; extern const struct ethnl_request_ops ethnl_ts_request_ops; +extern const struct ethnl_request_ops ethnl_ts_list_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; diff --git a/net/ethtool/ts.c b/net/ethtool/ts.c index 066cb06f4d0b..f2dd65a2e69c 100644 --- a/net/ethtool/ts.c +++ b/net/ethtool/ts.c @@ -86,3 +86,76 @@ const struct ethnl_request_ops ethnl_ts_request_ops = { .reply_size = ts_reply_size, .fill_reply = ts_fill_reply, }; + +/* TS_LIST_GET */ +struct ts_list_reply_data { + struct ethnl_reply_data base; + enum timestamping_layer ts_layer[__TIMESTAMPING_COUNT]; + u8 num_ts; +}; + +#define TS_LIST_REPDATA(__reply_base) \ + container_of(__reply_base, struct ts_list_reply_data, base) + +static int ts_list_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + const struct genl_info *info) +{ + struct ts_list_reply_data *data = TS_LIST_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + const struct ethtool_ops *ops = dev->ethtool_ops; + int ret, i = 0; + + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + if (phy_has_tsinfo(dev->phydev)) + data->ts_layer[i++] = PHY_TIMESTAMPING; + if (ops->get_ts_info) { + struct ethtool_ts_info ts_info = {0}; + + ops->get_ts_info(dev, &ts_info); + if (ts_info.so_timestamping & + SOF_TIMESTAMPING_HARDWARE_MASK) + data->ts_layer[i++] = MAC_TIMESTAMPING; + + if (ts_info.so_timestamping & + SOF_TIMESTAMPING_SOFTWARE_MASK) + data->ts_layer[i++] = SOFTWARE_TIMESTAMPING; + } + + data->num_ts = i; + ethnl_ops_complete(dev); + + return ret; +} + +static int ts_list_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + struct ts_list_reply_data *data = TS_LIST_REPDATA(reply_base); + + return nla_total_size(sizeof(u32)) * data->num_ts; +} + +static int ts_list_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + struct ts_list_reply_data *data = TS_LIST_REPDATA(reply_base); + + return nla_put(skb, ETHTOOL_A_TS_LIST_LAYER, sizeof(u32) * data->num_ts, data->ts_layer); +} + +const struct ethnl_request_ops ethnl_ts_list_request_ops = { + .request_cmd = ETHTOOL_MSG_TS_LIST_GET, + .reply_cmd = ETHTOOL_MSG_TS_LIST_GET_REPLY, + .hdr_attr = ETHTOOL_A_TS_HEADER, + .req_info_size = sizeof(struct ts_req_info), + .reply_data_size = sizeof(struct ts_list_reply_data), + + .prepare_data = ts_list_prepare_data, + .reply_size = ts_list_reply_size, + .fill_reply = ts_list_fill_reply, +}; -- cgit v1.3.1 From aed5004ee7a0e6f198a6b26fb6a1aa21588a9539 Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:39 +0100 Subject: netlink: specs: Introduce new netlink command to list available time stamping layers Add a new commands allowing to list available time stamping layers on a netdevice's link. Example usage : ./ynl/cli.py --spec netlink/specs/ethtool.yaml --no-schema \ --do ts-list-get \ --json '{"header":{"dev-name":"eth0"}}' {'header': {'dev-index': 3, 'dev-name': 'eth0'}, 'ts-list-layer': b'\x01\x00\x00\x00\x05\x00\x00\x00'} Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 61b30701fdad..591425f7fa76 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -949,6 +949,16 @@ attribute-sets: - name: ts-layer type: u32 + - + name: ts-list + attributes: + - + name: header + type: nest + nested-attributes: header + - + name: ts-list-layer + type: binary operations: enum-model: directional @@ -1713,3 +1723,17 @@ operations: attributes: &ts - header - ts-layer + - + name: ts-list-get + doc: Get list of timestamp devices available on an interface + + attribute-set: ts-list + + do: + request: + attributes: + - header + reply: + attributes: + - header + - ts-list-layer -- cgit v1.3.1 From 152c75e1d00200edc4da1beb67dd099a462ea86b Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:43 +0100 Subject: net: ethtool: ts: Let the active time stamping layer be selectable Now that the current timestamp is saved in a variable lets add the ETHTOOL_MSG_TS_SET ethtool netlink socket to make it selectable. Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 17 +++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.c | 8 +++ net/ethtool/netlink.h | 1 + net/ethtool/ts.c | 99 ++++++++++++++++++++++++++++ 5 files changed, 126 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index b8d00676ed82..530c1775e5f4 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -227,6 +227,7 @@ Userspace to kernel: ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters ``ETHTOOL_MSG_TS_GET`` get current timestamping ``ETHTOOL_MSG_TS_LIST_GET`` list available timestampings + ``ETHTOOL_MSG_TS_SET`` set current timestamping ===================================== ================================= Kernel to userspace: @@ -2038,6 +2039,21 @@ Kernel response contents: This command lists all the possible timestamp layer available. +TS_SET +====== + +Modify the selected timestamping. + +Request contents: + + ======================= ====== =================== + ``ETHTOOL_A_TS_HEADER`` nested reply header + ``ETHTOOL_A_TS_LAYER`` u32 timestamping + ======================= ====== =================== + +This command set the timestamping with one that should be listed by the +TSLIST_GET command. + Request translation =================== @@ -2146,4 +2162,5 @@ are netlink only. n/a ``ETHTOOL_MSG_MM_SET`` n/a ``ETHTOOL_MSG_TS_GET`` n/a ``ETHTOOL_MSG_TS_LIST_GET`` + n/a ``ETHTOOL_MSG_TS_SET`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 62b885d44d06..df6c4fcc62c1 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -59,6 +59,7 @@ enum { ETHTOOL_MSG_MM_SET, ETHTOOL_MSG_TS_GET, ETHTOOL_MSG_TS_LIST_GET, + ETHTOOL_MSG_TS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 842c9db1531f..8322bf71f80d 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -308,6 +308,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, [ETHTOOL_MSG_TS_GET] = ðnl_ts_request_ops, [ETHTOOL_MSG_TS_LIST_GET] = ðnl_ts_list_request_ops, + [ETHTOOL_MSG_TS_SET] = ðnl_ts_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1148,6 +1149,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_ts_get_policy, .maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_TS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_default_set_doit, + .policy = ethnl_ts_set_policy, + .maxattr = ARRAY_SIZE(ethnl_ts_set_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ea8c312db3af..8fedf234b824 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -444,6 +444,7 @@ extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADE extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; extern const struct nla_policy ethnl_ts_get_policy[ETHTOOL_A_TS_HEADER + 1]; +extern const struct nla_policy ethnl_ts_set_policy[ETHTOOL_A_TS_MAX + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/ts.c b/net/ethtool/ts.c index bd219512b8de..357265e74e08 100644 --- a/net/ethtool/ts.c +++ b/net/ethtool/ts.c @@ -59,6 +59,102 @@ static int ts_fill_reply(struct sk_buff *skb, return nla_put_u32(skb, ETHTOOL_A_TS_LAYER, data->ts_layer); } +/* TS_SET */ +const struct nla_policy ethnl_ts_set_policy[] = { + [ETHTOOL_A_TS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_TS_LAYER] = NLA_POLICY_RANGE(NLA_U32, 0, + __TIMESTAMPING_COUNT - 1) +}; + +static int ethnl_set_ts_validate(struct ethnl_req_info *req_info, + struct genl_info *info) +{ + struct nlattr **tb = info->attrs; + const struct net_device_ops *ops = req_info->dev->netdev_ops; + + if (!ops->ndo_hwtstamp_set) + return -EOPNOTSUPP; + + if (!tb[ETHTOOL_A_TS_LAYER]) + return 0; + + return 1; +} + +static int ethnl_set_ts(struct ethnl_req_info *req_info, struct genl_info *info) +{ + struct net_device *dev = req_info->dev; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct kernel_hwtstamp_config config = {0}; + struct nlattr **tb = info->attrs; + enum timestamping_layer ts_layer; + bool mod = false; + int ret; + + ts_layer = dev->ts_layer; + ethnl_update_u32(&ts_layer, tb[ETHTOOL_A_TS_LAYER], &mod); + + if (!mod) + return 0; + + if (ts_layer == SOFTWARE_TIMESTAMPING) { + struct ethtool_ts_info ts_info = {0}; + + if (!ops->get_ts_info) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_TS_LAYER], + "this net device cannot support timestamping"); + return -EINVAL; + } + + ops->get_ts_info(dev, &ts_info); + if ((ts_info.so_timestamping & + SOF_TIMESTAMPING_SOFTWARE_MASK) != + SOF_TIMESTAMPING_SOFTWARE_MASK) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_TS_LAYER], + "this net device cannot support software timestamping"); + return -EINVAL; + } + } else if (ts_layer == MAC_TIMESTAMPING) { + struct ethtool_ts_info ts_info = {0}; + + if (!ops->get_ts_info) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_TS_LAYER], + "this net device cannot support timestamping"); + return -EINVAL; + } + + ops->get_ts_info(dev, &ts_info); + if ((ts_info.so_timestamping & + SOF_TIMESTAMPING_HARDWARE_MASK) != + SOF_TIMESTAMPING_HARDWARE_MASK) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[ETHTOOL_A_TS_LAYER], + "this net device cannot support hardware timestamping"); + return -EINVAL; + } + } else if (ts_layer == PHY_TIMESTAMPING && !phy_has_tsinfo(dev->phydev)) { + NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_TS_LAYER], + "this phy device cannot support timestamping"); + return -EINVAL; + } + + /* Disable time stamping in the current layer. */ + if (netif_device_present(dev) && + (dev->ts_layer == PHY_TIMESTAMPING || + dev->ts_layer == MAC_TIMESTAMPING)) { + ret = dev_set_hwtstamp_phylib(dev, &config, info->extack); + if (ret < 0) + return ret; + } + + dev->ts_layer = ts_layer; + + return 1; +} + const struct ethnl_request_ops ethnl_ts_request_ops = { .request_cmd = ETHTOOL_MSG_TS_GET, .reply_cmd = ETHTOOL_MSG_TS_GET_REPLY, @@ -69,6 +165,9 @@ const struct ethnl_request_ops ethnl_ts_request_ops = { .prepare_data = ts_prepare_data, .reply_size = ts_reply_size, .fill_reply = ts_fill_reply, + + .set_validate = ethnl_set_ts_validate, + .set = ethnl_set_ts, }; /* TS_LIST_GET */ -- cgit v1.3.1 From ee60ea6be0d3a96e57edf07da923a9c0f27f37ce Mon Sep 17 00:00:00 2001 From: Kory Maincent Date: Tue, 14 Nov 2023 12:28:44 +0100 Subject: netlink: specs: Introduce time stamping set command Add a new commands allowing to set the time stamping. Example usage : ./ynl/cli.py --spec netlink/specs/ethtool.yaml --no-schema \ --do ts-list-get \ --json '{"header":{"dev-name":"eth0"}}' {'header': {'dev-index': 3, 'dev-name': 'eth0'}, 'ts-list-layer': b'\x02\x00\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00'} ./ynl/cli.py --spec netlink/specs/ethtool.yaml --no-schema --do ts-set \ --json '{"header":{"dev-name":"eth0"}, "ts-layer":5}' none ./ynl/cli.py --spec netlink/specs/ethtool.yaml --no-schema --do ts-get \ --json '{"header":{"dev-name":"eth0"}}' {'header': {'dev-index': 3, 'dev-name': 'eth0'}, 'ts-layer': 5} Signed-off-by: Kory Maincent Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 591425f7fa76..06d9120543d3 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -1737,3 +1737,12 @@ operations: attributes: - header - ts-list-layer + - + name: ts-set + doc: Set the timestamp device + + attribute-set: ts + + do: + request: + attributes: *ts -- cgit v1.3.1 From bf6b200bc80d18480f8d0fb61e185bb0587e633c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Wed, 15 Nov 2023 13:17:14 +0100 Subject: devlink: Acquire device lock during reload command Device drivers register with devlink from their probe routines (under the device lock) by acquiring the devlink instance lock and calling devl_register(). Drivers that support a devlink reload usually implement the reload_{down, up}() operations in a similar fashion to their remove and probe routines, respectively. However, while the remove and probe routines are invoked with the device lock held, the reload operations are only invoked with the devlink instance lock held. It is therefore impossible for drivers to acquire the device lock from their reload operations, as this would result in lock inversion. The motivating use case for invoking the reload operations with the device lock held is in mlxsw which needs to trigger a PCI reset as part of the reload. The driver cannot call pci_reset_function() as this function acquires the device lock. Instead, it needs to call __pci_reset_function_locked which expects the device lock to be held. To that end, adjust devlink to always acquire the device lock before the devlink instance lock when performing a reload. Do that when reload is explicitly triggered by user space by specifying the 'DEVLINK_NL_FLAG_NEED_DEV_LOCK' flag in the pre_doit and post_doit operations of the reload command. A previous patch already handled the case where reload is invoked as part of netns dismantle. Signed-off-by: Ido Schimmel Reviewed-by: Jiri Pirko Signed-off-by: Petr Machata Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/netlink/specs/devlink.yaml | 4 ++-- net/devlink/netlink.c | 13 +++++++++++++ net/devlink/netlink_gen.c | 4 ++-- net/devlink/netlink_gen.h | 5 +++++ 4 files changed, 22 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 572d83a414d0..43067e1f63aa 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -1484,8 +1484,8 @@ operations: dont-validate: [ strict ] flags: [ admin-perm ] do: - pre: devlink-nl-pre-doit - post: devlink-nl-post-doit + pre: devlink-nl-pre-doit-dev-lock + post: devlink-nl-post-doit-dev-lock request: attributes: - bus-name diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 86f12531bf99..fa9afe3e6d9b 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -138,6 +138,12 @@ int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_PORT); } +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + return __devlink_nl_pre_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK); +} + int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info) @@ -162,6 +168,13 @@ void devlink_nl_post_doit(const struct genl_split_ops *ops, __devlink_nl_post_doit(skb, info, 0); } +void +devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info) +{ + __devlink_nl_post_doit(skb, info, DEVLINK_NL_FLAG_NEED_DEV_LOCK); +} + static int devlink_nl_inst_single_dumpit(struct sk_buff *msg, struct netlink_callback *cb, int flags, devlink_nl_dump_one_func_t *dump_one, diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 788dfdc498a9..95f9b4350ab7 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -846,9 +846,9 @@ const struct genl_split_ops devlink_nl_ops[73] = { { .cmd = DEVLINK_CMD_RELOAD, .validate = GENL_DONT_VALIDATE_STRICT, - .pre_doit = devlink_nl_pre_doit, + .pre_doit = devlink_nl_pre_doit_dev_lock, .doit = devlink_nl_reload_doit, - .post_doit = devlink_nl_post_doit, + .post_doit = devlink_nl_post_doit_dev_lock, .policy = devlink_reload_nl_policy, .maxattr = DEVLINK_ATTR_RELOAD_LIMITS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 0e9e89c31c31..02f3c0bfae0e 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -22,12 +22,17 @@ int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); void devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +void +devlink_nl_post_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); int devlink_nl_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); -- cgit v1.3.1 From 289354f21b2c3fac93e956efd45f256a88a4d997 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sat, 18 Nov 2023 18:38:05 -0800 Subject: net: partial revert of the "Make timestamping selectable: series Revert following commits: commit acec05fb78ab ("net_tstamp: Add TIMESTAMPING SOFTWARE and HARDWARE mask") commit 11d55be06df0 ("net: ethtool: Add a command to expose current time stamping layer") commit bb8645b00ced ("netlink: specs: Introduce new netlink command to get current timestamp") commit d905f9c75329 ("net: ethtool: Add a command to list available time stamping layers") commit aed5004ee7a0 ("netlink: specs: Introduce new netlink command to list available time stamping layers") commit 51bdf3165f01 ("net: Replace hwtstamp_source by timestamping layer") commit 0f7f463d4821 ("net: Change the API of PHY default timestamp to MAC") commit 091fab122869 ("net: ethtool: ts: Update GET_TS to reply the current selected timestamp") commit 152c75e1d002 ("net: ethtool: ts: Let the active time stamping layer be selectable") commit ee60ea6be0d3 ("netlink: specs: Introduce time stamping set command") They need more time for reviews. Link: https://lore.kernel.org/all/20231118183529.6e67100c@kernel.org/ Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 57 ----- Documentation/networking/ethtool-netlink.rst | 63 ------ .../net/ethernet/microchip/lan966x/lan966x_main.c | 6 +- drivers/net/phy/bcm-phy-ptp.c | 3 - drivers/net/phy/dp83640.c | 3 - drivers/net/phy/micrel.c | 6 - drivers/net/phy/mscc/mscc_ptp.c | 2 - drivers/net/phy/nxp-c45-tja11xx.c | 3 - drivers/net/phy/phy_device.c | 37 ---- include/linux/net_tstamp.h | 11 +- include/linux/netdevice.h | 5 - include/linux/phy.h | 4 - include/uapi/linux/ethtool_netlink.h | 29 --- include/uapi/linux/net_tstamp.h | 18 -- net/core/dev.c | 3 - net/core/dev_ioctl.c | 36 ++- net/core/timestamping.c | 10 - net/ethtool/Makefile | 2 +- net/ethtool/common.c | 19 +- net/ethtool/common.h | 1 - net/ethtool/netlink.c | 28 --- net/ethtool/netlink.h | 4 - net/ethtool/ts.c | 244 --------------------- 23 files changed, 28 insertions(+), 566 deletions(-) delete mode 100644 net/ethtool/ts.c (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 06d9120543d3..5c7a65b009b4 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -939,26 +939,6 @@ attribute-sets: - name: burst-tmr type: u32 - - - name: ts - attributes: - - - name: header - type: nest - nested-attributes: header - - - name: ts-layer - type: u32 - - - name: ts-list - attributes: - - - name: header - type: nest - nested-attributes: header - - - name: ts-list-layer - type: binary operations: enum-model: directional @@ -1709,40 +1689,3 @@ operations: name: mm-ntf doc: Notification for change in MAC Merge configuration. notify: mm-get - - - name: ts-get - doc: Get current timestamp - - attribute-set: ts - - do: - request: - attributes: - - header - reply: - attributes: &ts - - header - - ts-layer - - - name: ts-list-get - doc: Get list of timestamp devices available on an interface - - attribute-set: ts-list - - do: - request: - attributes: - - header - reply: - attributes: - - header - - ts-list-layer - - - name: ts-set - doc: Set the timestamp device - - attribute-set: ts - - do: - request: - attributes: *ts diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 530c1775e5f4..2540c70952ff 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -225,9 +225,6 @@ Userspace to kernel: ``ETHTOOL_MSG_RSS_GET`` get RSS settings ``ETHTOOL_MSG_MM_GET`` get MAC merge layer state ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters - ``ETHTOOL_MSG_TS_GET`` get current timestamping - ``ETHTOOL_MSG_TS_LIST_GET`` list available timestampings - ``ETHTOOL_MSG_TS_SET`` set current timestamping ===================================== ================================= Kernel to userspace: @@ -271,8 +268,6 @@ Kernel to userspace: ``ETHTOOL_MSG_PSE_GET_REPLY`` PSE parameters ``ETHTOOL_MSG_RSS_GET_REPLY`` RSS settings ``ETHTOOL_MSG_MM_GET_REPLY`` MAC merge layer status - ``ETHTOOL_MSG_TS_GET_REPLY`` current timestamping - ``ETHTOOL_MSG_TS_LIST_GET_REPLY`` available timestampings ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -1999,61 +1994,6 @@ The attributes are propagated to the driver through the following structure: .. kernel-doc:: include/linux/ethtool.h :identifiers: ethtool_mm_cfg -TS_GET -====== - -Gets current timestamping. - -Request contents: - - ================================= ====== ==================== - ``ETHTOOL_A_TS_HEADER`` nested request header - ================================= ====== ==================== - -Kernel response contents: - - ======================= ====== ============================== - ``ETHTOOL_A_TS_HEADER`` nested reply header - ``ETHTOOL_A_TS_LAYER`` u32 current timestamping - ======================= ====== ============================== - -This command get the current timestamp layer. - -TS_LIST_GET -=========== - -Get the list of available timestampings. - -Request contents: - - ================================= ====== ==================== - ``ETHTOOL_A_TS_HEADER`` nested request header - ================================= ====== ==================== - -Kernel response contents: - - =========================== ====== ============================== - ``ETHTOOL_A_TS_HEADER`` nested reply header - ``ETHTOOL_A_TS_LIST_LAYER`` binary available timestampings - =========================== ====== ============================== - -This command lists all the possible timestamp layer available. - -TS_SET -====== - -Modify the selected timestamping. - -Request contents: - - ======================= ====== =================== - ``ETHTOOL_A_TS_HEADER`` nested reply header - ``ETHTOOL_A_TS_LAYER`` u32 timestamping - ======================= ====== =================== - -This command set the timestamping with one that should be listed by the -TSLIST_GET command. - Request translation =================== @@ -2160,7 +2100,4 @@ are netlink only. n/a ``ETHTOOL_MSG_PLCA_GET_STATUS`` n/a ``ETHTOOL_MSG_MM_GET`` n/a ``ETHTOOL_MSG_MM_SET`` - n/a ``ETHTOOL_MSG_TS_GET`` - n/a ``ETHTOOL_MSG_TS_LIST_GET`` - n/a ``ETHTOOL_MSG_TS_SET`` =================================== ===================================== diff --git a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c index fbe56b1bb386..2635ef8958c8 100644 --- a/drivers/net/ethernet/microchip/lan966x/lan966x_main.c +++ b/drivers/net/ethernet/microchip/lan966x/lan966x_main.c @@ -470,15 +470,15 @@ static int lan966x_port_hwtstamp_set(struct net_device *dev, struct lan966x_port *port = netdev_priv(dev); int err; - if (cfg->source != MAC_TIMESTAMPING && - cfg->source != PHY_TIMESTAMPING) + if (cfg->source != HWTSTAMP_SOURCE_NETDEV && + cfg->source != HWTSTAMP_SOURCE_PHYLIB) return -EOPNOTSUPP; err = lan966x_ptp_setup_traps(port, cfg); if (err) return err; - if (cfg->source == MAC_TIMESTAMPING) { + if (cfg->source == HWTSTAMP_SOURCE_NETDEV) { if (!port->lan966x->ptp) return -EOPNOTSUPP; diff --git a/drivers/net/phy/bcm-phy-ptp.c b/drivers/net/phy/bcm-phy-ptp.c index d3e825c951ee..617d384d4551 100644 --- a/drivers/net/phy/bcm-phy-ptp.c +++ b/drivers/net/phy/bcm-phy-ptp.c @@ -931,9 +931,6 @@ struct bcm_ptp_private *bcm_ptp_probe(struct phy_device *phydev) return ERR_CAST(clock); priv->ptp_clock = clock; - /* Timestamp selected by default to keep legacy API */ - phydev->default_timestamp = true; - priv->phydev = phydev; bcm_ptp_init(priv); diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c index 64fd1a109c0f..5c42c47dc564 100644 --- a/drivers/net/phy/dp83640.c +++ b/drivers/net/phy/dp83640.c @@ -1450,9 +1450,6 @@ static int dp83640_probe(struct phy_device *phydev) phydev->mii_ts = &dp83640->mii_ts; phydev->priv = dp83640; - /* Timestamp selected by default to keep legacy API */ - phydev->default_timestamp = true; - spin_lock_init(&dp83640->rx_lock); skb_queue_head_init(&dp83640->rx_queue); skb_queue_head_init(&dp83640->tx_queue); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 2b8dd0131926..bd4cd082662f 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -3158,9 +3158,6 @@ static void lan8814_ptp_init(struct phy_device *phydev) ptp_priv->mii_ts.ts_info = lan8814_ts_info; phydev->mii_ts = &ptp_priv->mii_ts; - - /* Timestamp selected by default to keep legacy API */ - phydev->default_timestamp = true; } static int lan8814_ptp_probe_once(struct phy_device *phydev) @@ -4589,9 +4586,6 @@ static int lan8841_probe(struct phy_device *phydev) phydev->mii_ts = &ptp_priv->mii_ts; - /* Timestamp selected by default to keep legacy API */ - phydev->default_timestamp = true; - return 0; } diff --git a/drivers/net/phy/mscc/mscc_ptp.c b/drivers/net/phy/mscc/mscc_ptp.c index fd174eb06d4a..eb0b032cb613 100644 --- a/drivers/net/phy/mscc/mscc_ptp.c +++ b/drivers/net/phy/mscc/mscc_ptp.c @@ -1570,8 +1570,6 @@ int vsc8584_ptp_probe(struct phy_device *phydev) return PTR_ERR(vsc8531->load_save); } - /* Timestamp selected by default to keep legacy API */ - phydev->default_timestamp = true; vsc8531->ptp->phydev = phydev; return 0; diff --git a/drivers/net/phy/nxp-c45-tja11xx.c b/drivers/net/phy/nxp-c45-tja11xx.c index 0515c7b979db..780ad353cf55 100644 --- a/drivers/net/phy/nxp-c45-tja11xx.c +++ b/drivers/net/phy/nxp-c45-tja11xx.c @@ -1658,9 +1658,6 @@ static int nxp_c45_probe(struct phy_device *phydev) priv->mii_ts.ts_info = nxp_c45_ts_info; phydev->mii_ts = &priv->mii_ts; ret = nxp_c45_init_ptp_clock(priv); - - /* Timestamp selected by default to keep legacy API */ - phydev->default_timestamp = true; } else { phydev_dbg(phydev, "PTP support not enabled even if the phy supports it"); } diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 8c4794631daa..2ce74593d6e4 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -1411,26 +1411,6 @@ int phy_sfp_probe(struct phy_device *phydev, } EXPORT_SYMBOL(phy_sfp_probe); -/** - * phy_set_timestamp - set the default selected timestamping device - * @dev: Pointer to net_device - * @phydev: Pointer to phy_device - * - * This is used to set default timestamping device taking into account - * the new API choice, which is selecting the timestamping from MAC by - * default if the phydev does not have default_timestamp flag enabled. - */ -static void phy_set_timestamp(struct net_device *dev, struct phy_device *phydev) -{ - const struct ethtool_ops *ops = dev->ethtool_ops; - - if (!phy_has_tsinfo(phydev)) - return; - - if (!ops->get_ts_info || phydev->default_timestamp) - dev->ts_layer = PHY_TIMESTAMPING; -} - /** * phy_attach_direct - attach a network device to a given PHY device pointer * @dev: network device to attach @@ -1504,7 +1484,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, phydev->phy_link_change = phy_link_change; if (dev) { - phy_set_timestamp(dev, phydev); phydev->attached_dev = dev; dev->phydev = phydev; @@ -1833,22 +1812,6 @@ void phy_detach(struct phy_device *phydev) phy_suspend(phydev); if (dev) { - const struct ethtool_ops *ops = dev->ethtool_ops; - struct ethtool_ts_info ts_info = {0}; - - if (ops->get_ts_info) { - ops->get_ts_info(dev, &ts_info); - if ((ts_info.so_timestamping & - SOF_TIMESTAMPING_HARDWARE_MASK) == - SOF_TIMESTAMPING_HARDWARE_MASK) - dev->ts_layer = MAC_TIMESTAMPING; - else if ((ts_info.so_timestamping & - SOF_TIMESTAMPING_SOFTWARE_MASK) == - SOF_TIMESTAMPING_SOFTWARE_MASK) - dev->ts_layer = SOFTWARE_TIMESTAMPING; - } else { - dev->ts_layer = NO_TIMESTAMPING; - } phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; } diff --git a/include/linux/net_tstamp.h b/include/linux/net_tstamp.h index bb289c2ad376..eb01c37e71e0 100644 --- a/include/linux/net_tstamp.h +++ b/include/linux/net_tstamp.h @@ -5,6 +5,11 @@ #include +enum hwtstamp_source { + HWTSTAMP_SOURCE_NETDEV, + HWTSTAMP_SOURCE_PHYLIB, +}; + /** * struct kernel_hwtstamp_config - Kernel copy of struct hwtstamp_config * @@ -15,8 +20,8 @@ * a legacy implementation of a lower driver * @copied_to_user: request was passed to a legacy implementation which already * copied the ioctl request back to user space - * @source: indication whether timestamps should come from software, the netdev - * or from an attached phylib PHY + * @source: indication whether timestamps should come from the netdev or from + * an attached phylib PHY * * Prefer using this structure for in-kernel processing of hardware * timestamping configuration, over the inextensible struct hwtstamp_config @@ -28,7 +33,7 @@ struct kernel_hwtstamp_config { int rx_filter; struct ifreq *ifr; bool copied_to_user; - enum timestamping_layer source; + enum hwtstamp_source source; }; static inline void hwtstamp_config_to_kernel(struct kernel_hwtstamp_config *kernel_cfg, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f020d2790c12..2d840d7056f2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -47,7 +47,6 @@ #include #include #include -#include #include #include #include @@ -2075,8 +2074,6 @@ enum netdev_ml_priv_type { * * @dpll_pin: Pointer to the SyncE source pin of a DPLL subsystem, * where the clock is recovered. - * @ts_layer: Tracks which network device - * performs packet time stamping. * * FIXME: cleanup struct net_device such that network protocol info * moves out. @@ -2438,8 +2435,6 @@ struct net_device { #if IS_ENABLED(CONFIG_DPLL) struct dpll_pin *dpll_pin; #endif - - enum timestamping_layer ts_layer; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/include/linux/phy.h b/include/linux/phy.h index 317def2a7843..e5f1f41e399c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -604,8 +604,6 @@ struct macsec_ops; * handling shall be postponed until PHY has resumed * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended, * requiring a rerun of the interrupt handler after resume - * @default_timestamp: Flag indicating whether we are using the phy - * timestamp as the default one * @interface: enum phy_interface_t value * @skb: Netlink message for cable diagnostics * @nest: Netlink nest used for cable diagnostics @@ -669,8 +667,6 @@ struct phy_device { unsigned irq_suspended:1; unsigned irq_rerun:1; - unsigned default_timestamp:1; - int rate_matching; enum phy_state state; diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index df6c4fcc62c1..73e2c10dc2cc 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,9 +57,6 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, - ETHTOOL_MSG_TS_GET, - ETHTOOL_MSG_TS_LIST_GET, - ETHTOOL_MSG_TS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -112,8 +109,6 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, - ETHTOOL_MSG_TS_GET_REPLY, - ETHTOOL_MSG_TS_LIST_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -980,30 +975,6 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; -/* TS LAYER */ - -enum { - ETHTOOL_A_TS_UNSPEC, - ETHTOOL_A_TS_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TS_LAYER, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_TS_CNT, - ETHTOOL_A_TS_MAX = (__ETHTOOL_A_TS_CNT - 1) -}; - -/* TS LIST LAYER */ - -enum { - ETHTOOL_A_TS_LIST_UNSPEC, - ETHTOOL_A_TS_LIST_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_TS_LIST_LAYER, /* array, u32 */ - - /* add new constants above here */ - __ETHTOOL_A_TS_LIST_CNT, - ETHTOOL_A_TS_LIST_MAX = (__ETHTOOL_A_TS_LIST_CNT - 1) -}; - /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index 4551fb3d7720..a2c66b3d7f0f 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -13,16 +13,6 @@ #include #include /* for SO_TIMESTAMPING */ -/* Layer of the TIMESTAMPING provider */ -enum timestamping_layer { - NO_TIMESTAMPING, - SOFTWARE_TIMESTAMPING, - MAC_TIMESTAMPING, - PHY_TIMESTAMPING, - - __TIMESTAMPING_COUNT, -}; - /* SO_TIMESTAMPING flags */ enum { SOF_TIMESTAMPING_TX_HARDWARE = (1<<0), @@ -58,14 +48,6 @@ enum { SOF_TIMESTAMPING_TX_SCHED | \ SOF_TIMESTAMPING_TX_ACK) -#define SOF_TIMESTAMPING_SOFTWARE_MASK (SOF_TIMESTAMPING_RX_SOFTWARE | \ - SOF_TIMESTAMPING_TX_SOFTWARE | \ - SOF_TIMESTAMPING_SOFTWARE) - -#define SOF_TIMESTAMPING_HARDWARE_MASK (SOF_TIMESTAMPING_RX_HARDWARE | \ - SOF_TIMESTAMPING_TX_HARDWARE | \ - SOF_TIMESTAMPING_RAW_HARDWARE) - /** * struct so_timestamping - SO_TIMESTAMPING parameter * diff --git a/net/core/dev.c b/net/core/dev.c index 05ce00632892..af53f6d838ce 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10212,9 +10212,6 @@ int register_netdevice(struct net_device *dev) dev->rtnl_link_state == RTNL_LINK_INITIALIZED) rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL, 0, NULL); - if (dev->ethtool_ops->get_ts_info) - dev->ts_layer = MAC_TIMESTAMPING; - out: return ret; diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index bc8be9749376..9a66cf5015f2 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -259,7 +259,9 @@ static int dev_eth_ioctl(struct net_device *dev, * @dev: Network device * @cfg: Timestamping configuration structure * - * Helper for calling the selected hardware provider timestamping. + * Helper for enforcing a common policy that phylib timestamping, if available, + * should take precedence in front of hardware timestamping provided by the + * netdev. * * Note: phy_mii_ioctl() only handles SIOCSHWTSTAMP (not SIOCGHWTSTAMP), and * there only exists a phydev->mii_ts->hwtstamp() method. So this will return @@ -269,14 +271,10 @@ static int dev_eth_ioctl(struct net_device *dev, static int dev_get_hwtstamp_phylib(struct net_device *dev, struct kernel_hwtstamp_config *cfg) { - enum timestamping_layer ts_layer = dev->ts_layer; - - if (ts_layer == PHY_TIMESTAMPING) + if (phy_has_hwtstamp(dev->phydev)) return phy_hwtstamp_get(dev->phydev, cfg); - else if (ts_layer == MAC_TIMESTAMPING) - return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); - return -EOPNOTSUPP; + return dev->netdev_ops->ndo_hwtstamp_get(dev, cfg); } static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) @@ -317,8 +315,9 @@ static int dev_get_hwtstamp(struct net_device *dev, struct ifreq *ifr) * @cfg: Timestamping configuration structure * @extack: Netlink extended ack message structure, for error reporting * - * Helper for calling the selected hardware provider timestamping. - * If the netdev driver needs to perform specific actions even for PHY + * Helper for enforcing a common policy that phylib timestamping, if available, + * should take precedence in front of hardware timestamping provided by the + * netdev. If the netdev driver needs to perform specific actions even for PHY * timestamping to work properly (a switch port must trap the timestamped * frames and not forward them), it must set IFF_SEE_ALL_HWTSTAMP_REQUESTS in * dev->priv_flags. @@ -328,26 +327,20 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, struct netlink_ext_ack *extack) { const struct net_device_ops *ops = dev->netdev_ops; - enum timestamping_layer ts_layer = dev->ts_layer; + bool phy_ts = phy_has_hwtstamp(dev->phydev); struct kernel_hwtstamp_config old_cfg = {}; bool changed = false; int err; - cfg->source = ts_layer; - - if (ts_layer != PHY_TIMESTAMPING && - ts_layer != MAC_TIMESTAMPING) - return -EOPNOTSUPP; + cfg->source = phy_ts ? HWTSTAMP_SOURCE_PHYLIB : HWTSTAMP_SOURCE_NETDEV; - if (ts_layer == PHY_TIMESTAMPING && - dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS) { + if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) { err = ops->ndo_hwtstamp_get(dev, &old_cfg); if (err) return err; } - if (ts_layer == MAC_TIMESTAMPING || - dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS) { + if (!phy_ts || (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) { err = ops->ndo_hwtstamp_set(dev, cfg, extack); if (err) { if (extack->_msg) @@ -356,11 +349,10 @@ int dev_set_hwtstamp_phylib(struct net_device *dev, } } - if (ts_layer == PHY_TIMESTAMPING && - dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS) + if (phy_ts && (dev->priv_flags & IFF_SEE_ALL_HWTSTAMP_REQUESTS)) changed = kernel_hwtstamp_config_changed(&old_cfg, cfg); - if (ts_layer == PHY_TIMESTAMPING) { + if (phy_ts) { err = phy_hwtstamp_set(dev->phydev, cfg, extack); if (err) { if (changed) diff --git a/net/core/timestamping.c b/net/core/timestamping.c index 5cf51a523fb3..04840697fe79 100644 --- a/net/core/timestamping.c +++ b/net/core/timestamping.c @@ -21,7 +21,6 @@ static unsigned int classify(const struct sk_buff *skb) void skb_clone_tx_timestamp(struct sk_buff *skb) { - enum timestamping_layer ts_layer; struct mii_timestamper *mii_ts; struct sk_buff *clone; unsigned int type; @@ -29,10 +28,6 @@ void skb_clone_tx_timestamp(struct sk_buff *skb) if (!skb->sk) return; - ts_layer = skb->dev->ts_layer; - if (ts_layer != PHY_TIMESTAMPING) - return; - type = classify(skb); if (type == PTP_CLASS_NONE) return; @@ -49,17 +44,12 @@ EXPORT_SYMBOL_GPL(skb_clone_tx_timestamp); bool skb_defer_rx_timestamp(struct sk_buff *skb) { - enum timestamping_layer ts_layer; struct mii_timestamper *mii_ts; unsigned int type; if (!skb->dev || !skb->dev->phydev || !skb->dev->phydev->mii_ts) return false; - ts_layer = skb->dev->ts_layer; - if (ts_layer != PHY_TIMESTAMPING) - return false; - if (skb_headroom(skb) < ETH_HLEN) return false; diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 4ea64c080639..504f954a1b28 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ - module.o pse-pd.o plca.o mm.o ts.o + module.o pse-pd.o plca.o mm.o diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 9f6e3b2c74e2..11d8797f63f6 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -633,28 +633,13 @@ int __ethtool_get_ts_info(struct net_device *dev, struct ethtool_ts_info *info) { const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; - enum timestamping_layer ts_layer; - int ret; memset(info, 0, sizeof(*info)); info->cmd = ETHTOOL_GET_TS_INFO; - ts_layer = dev->ts_layer; - if (ts_layer == SOFTWARE_TIMESTAMPING) { - ret = ops->get_ts_info(dev, info); - if (ret) - return ret; - info->so_timestamping &= ~SOF_TIMESTAMPING_HARDWARE_MASK; - info->phc_index = -1; - info->rx_filters = 0; - info->tx_types = 0; - return 0; - } - - if (ts_layer == PHY_TIMESTAMPING) + if (phy_has_tsinfo(phydev)) return phy_ts_info(phydev, info); - - if (ts_layer == MAC_TIMESTAMPING) + if (ops->get_ts_info) return ops->get_ts_info(dev, info); info->so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | diff --git a/net/ethtool/common.h b/net/ethtool/common.h index a264b635f7d3..28b8aaaf9bcb 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -35,7 +35,6 @@ extern const char wol_mode_names[][ETH_GSTRING_LEN]; extern const char sof_timestamping_names[][ETH_GSTRING_LEN]; extern const char ts_tx_type_names[][ETH_GSTRING_LEN]; extern const char ts_rx_filter_names[][ETH_GSTRING_LEN]; -extern const char ts_layer_names[][ETH_GSTRING_LEN]; extern const char udp_tunnel_type_names[][ETH_GSTRING_LEN]; int __ethtool_get_link(struct net_device *dev); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 8322bf71f80d..3bbd5afb7b31 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -306,9 +306,6 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_PLCA_GET_STATUS] = ðnl_plca_status_request_ops, [ETHTOOL_MSG_MM_GET] = ðnl_mm_request_ops, [ETHTOOL_MSG_MM_SET] = ðnl_mm_request_ops, - [ETHTOOL_MSG_TS_GET] = ðnl_ts_request_ops, - [ETHTOOL_MSG_TS_LIST_GET] = ðnl_ts_list_request_ops, - [ETHTOOL_MSG_TS_SET] = ðnl_ts_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -1131,31 +1128,6 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_mm_set_policy, .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1, }, - { - .cmd = ETHTOOL_MSG_TS_GET, - .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, - .policy = ethnl_ts_get_policy, - .maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1, - }, - { - .cmd = ETHTOOL_MSG_TS_LIST_GET, - .doit = ethnl_default_doit, - .start = ethnl_default_start, - .dumpit = ethnl_default_dumpit, - .done = ethnl_default_done, - .policy = ethnl_ts_get_policy, - .maxattr = ARRAY_SIZE(ethnl_ts_get_policy) - 1, - }, - { - .cmd = ETHTOOL_MSG_TS_SET, - .flags = GENL_UNS_ADMIN_PERM, - .doit = ethnl_default_set_doit, - .policy = ethnl_ts_set_policy, - .maxattr = ARRAY_SIZE(ethnl_ts_set_policy) - 1, - }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 8fedf234b824..9a333a8d04c1 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -395,8 +395,6 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; -extern const struct ethnl_request_ops ethnl_ts_request_ops; -extern const struct ethnl_request_ops ethnl_ts_list_request_ops; extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; @@ -443,8 +441,6 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1] extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1]; extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; -extern const struct nla_policy ethnl_ts_get_policy[ETHTOOL_A_TS_HEADER + 1]; -extern const struct nla_policy ethnl_ts_set_policy[ETHTOOL_A_TS_MAX + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/ts.c b/net/ethtool/ts.c deleted file mode 100644 index 357265e74e08..000000000000 --- a/net/ethtool/ts.c +++ /dev/null @@ -1,244 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only - -#include -#include - -#include "netlink.h" -#include "common.h" -#include "bitset.h" - -struct ts_req_info { - struct ethnl_req_info base; -}; - -struct ts_reply_data { - struct ethnl_reply_data base; - enum timestamping_layer ts_layer; -}; - -#define TS_REPDATA(__reply_base) \ - container_of(__reply_base, struct ts_reply_data, base) - -/* TS_GET */ -const struct nla_policy ethnl_ts_get_policy[] = { - [ETHTOOL_A_TS_HEADER] = - NLA_POLICY_NESTED(ethnl_header_policy), -}; - -static int ts_prepare_data(const struct ethnl_req_info *req_base, - struct ethnl_reply_data *reply_base, - const struct genl_info *info) -{ - struct ts_reply_data *data = TS_REPDATA(reply_base); - struct net_device *dev = reply_base->dev; - int ret; - - ret = ethnl_ops_begin(dev); - if (ret < 0) - return ret; - - data->ts_layer = dev->ts_layer; - - ethnl_ops_complete(dev); - - return ret; -} - -static int ts_reply_size(const struct ethnl_req_info *req_base, - const struct ethnl_reply_data *reply_base) -{ - return nla_total_size(sizeof(u32)); -} - -static int ts_fill_reply(struct sk_buff *skb, - const struct ethnl_req_info *req_base, - const struct ethnl_reply_data *reply_base) -{ - struct ts_reply_data *data = TS_REPDATA(reply_base); - - return nla_put_u32(skb, ETHTOOL_A_TS_LAYER, data->ts_layer); -} - -/* TS_SET */ -const struct nla_policy ethnl_ts_set_policy[] = { - [ETHTOOL_A_TS_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), - [ETHTOOL_A_TS_LAYER] = NLA_POLICY_RANGE(NLA_U32, 0, - __TIMESTAMPING_COUNT - 1) -}; - -static int ethnl_set_ts_validate(struct ethnl_req_info *req_info, - struct genl_info *info) -{ - struct nlattr **tb = info->attrs; - const struct net_device_ops *ops = req_info->dev->netdev_ops; - - if (!ops->ndo_hwtstamp_set) - return -EOPNOTSUPP; - - if (!tb[ETHTOOL_A_TS_LAYER]) - return 0; - - return 1; -} - -static int ethnl_set_ts(struct ethnl_req_info *req_info, struct genl_info *info) -{ - struct net_device *dev = req_info->dev; - const struct ethtool_ops *ops = dev->ethtool_ops; - struct kernel_hwtstamp_config config = {0}; - struct nlattr **tb = info->attrs; - enum timestamping_layer ts_layer; - bool mod = false; - int ret; - - ts_layer = dev->ts_layer; - ethnl_update_u32(&ts_layer, tb[ETHTOOL_A_TS_LAYER], &mod); - - if (!mod) - return 0; - - if (ts_layer == SOFTWARE_TIMESTAMPING) { - struct ethtool_ts_info ts_info = {0}; - - if (!ops->get_ts_info) { - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_TS_LAYER], - "this net device cannot support timestamping"); - return -EINVAL; - } - - ops->get_ts_info(dev, &ts_info); - if ((ts_info.so_timestamping & - SOF_TIMESTAMPING_SOFTWARE_MASK) != - SOF_TIMESTAMPING_SOFTWARE_MASK) { - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_TS_LAYER], - "this net device cannot support software timestamping"); - return -EINVAL; - } - } else if (ts_layer == MAC_TIMESTAMPING) { - struct ethtool_ts_info ts_info = {0}; - - if (!ops->get_ts_info) { - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_TS_LAYER], - "this net device cannot support timestamping"); - return -EINVAL; - } - - ops->get_ts_info(dev, &ts_info); - if ((ts_info.so_timestamping & - SOF_TIMESTAMPING_HARDWARE_MASK) != - SOF_TIMESTAMPING_HARDWARE_MASK) { - NL_SET_ERR_MSG_ATTR(info->extack, - tb[ETHTOOL_A_TS_LAYER], - "this net device cannot support hardware timestamping"); - return -EINVAL; - } - } else if (ts_layer == PHY_TIMESTAMPING && !phy_has_tsinfo(dev->phydev)) { - NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_TS_LAYER], - "this phy device cannot support timestamping"); - return -EINVAL; - } - - /* Disable time stamping in the current layer. */ - if (netif_device_present(dev) && - (dev->ts_layer == PHY_TIMESTAMPING || - dev->ts_layer == MAC_TIMESTAMPING)) { - ret = dev_set_hwtstamp_phylib(dev, &config, info->extack); - if (ret < 0) - return ret; - } - - dev->ts_layer = ts_layer; - - return 1; -} - -const struct ethnl_request_ops ethnl_ts_request_ops = { - .request_cmd = ETHTOOL_MSG_TS_GET, - .reply_cmd = ETHTOOL_MSG_TS_GET_REPLY, - .hdr_attr = ETHTOOL_A_TS_HEADER, - .req_info_size = sizeof(struct ts_req_info), - .reply_data_size = sizeof(struct ts_reply_data), - - .prepare_data = ts_prepare_data, - .reply_size = ts_reply_size, - .fill_reply = ts_fill_reply, - - .set_validate = ethnl_set_ts_validate, - .set = ethnl_set_ts, -}; - -/* TS_LIST_GET */ -struct ts_list_reply_data { - struct ethnl_reply_data base; - enum timestamping_layer ts_layer[__TIMESTAMPING_COUNT]; - u8 num_ts; -}; - -#define TS_LIST_REPDATA(__reply_base) \ - container_of(__reply_base, struct ts_list_reply_data, base) - -static int ts_list_prepare_data(const struct ethnl_req_info *req_base, - struct ethnl_reply_data *reply_base, - const struct genl_info *info) -{ - struct ts_list_reply_data *data = TS_LIST_REPDATA(reply_base); - struct net_device *dev = reply_base->dev; - const struct ethtool_ops *ops = dev->ethtool_ops; - int ret, i = 0; - - ret = ethnl_ops_begin(dev); - if (ret < 0) - return ret; - - if (phy_has_tsinfo(dev->phydev)) - data->ts_layer[i++] = PHY_TIMESTAMPING; - if (ops->get_ts_info) { - struct ethtool_ts_info ts_info = {0}; - - ops->get_ts_info(dev, &ts_info); - if (ts_info.so_timestamping & - SOF_TIMESTAMPING_HARDWARE_MASK) - data->ts_layer[i++] = MAC_TIMESTAMPING; - - if (ts_info.so_timestamping & - SOF_TIMESTAMPING_SOFTWARE_MASK) - data->ts_layer[i++] = SOFTWARE_TIMESTAMPING; - } - - data->num_ts = i; - ethnl_ops_complete(dev); - - return ret; -} - -static int ts_list_reply_size(const struct ethnl_req_info *req_base, - const struct ethnl_reply_data *reply_base) -{ - struct ts_list_reply_data *data = TS_LIST_REPDATA(reply_base); - - return nla_total_size(sizeof(u32)) * data->num_ts; -} - -static int ts_list_fill_reply(struct sk_buff *skb, - const struct ethnl_req_info *req_base, - const struct ethnl_reply_data *reply_base) -{ - struct ts_list_reply_data *data = TS_LIST_REPDATA(reply_base); - - return nla_put(skb, ETHTOOL_A_TS_LIST_LAYER, sizeof(u32) * data->num_ts, data->ts_layer); -} - -const struct ethnl_request_ops ethnl_ts_list_request_ops = { - .request_cmd = ETHTOOL_MSG_TS_LIST_GET, - .reply_cmd = ETHTOOL_MSG_TS_LIST_GET_REPLY, - .hdr_attr = ETHTOOL_A_TS_HEADER, - .req_info_size = sizeof(struct ts_req_info), - .reply_data_size = sizeof(struct ts_list_reply_data), - - .prepare_data = ts_list_prepare_data, - .reply_size = ts_list_reply_size, - .fill_reply = ts_list_fill_reply, -}; -- cgit v1.3.1 From 5e63c5ef7a99d4cc13ddb4964bdeaff45c0364a0 Mon Sep 17 00:00:00 2001 From: Radhey Shyam Pandey Date: Thu, 16 Nov 2023 00:26:51 +0530 Subject: dt-bindings: net: xlnx,axi-ethernet: Introduce DMA support Xilinx 1G/2.5G Ethernet Subsystem provides 32-bit AXI4-Stream buses to move transmit and receive Ethernet data to and from the subsystem. These buses are designed to be used with an AXI Direct Memory Access(DMA) IP or AXI Multichannel Direct Memory Access (MCDMA) IP core, AXI4-Stream Data FIFO, or any other custom logic in any supported device. Primary high-speed DMA data movement between system memory and stream target is through the AXI4 Read Master to AXI4 memory-mapped to stream (MM2S) Master, and AXI stream to memory-mapped (S2MM) Slave to AXI4 Write Master. AXI DMA/MCDMA enables channel of data movement on both MM2S and S2MM paths in scatter/gather mode. AXI DMA has two channels where as MCDMA has 16 Tx and 16 Rx channels. To uniquely identify each channel use 'chan' suffix. Depending on the usecase AXI ethernet driver can request any combination of multichannel DMA channels using generic dmas, dma-names properties. Example: dma-names = tx_chan0, rx_chan0, tx_chan1, rx_chan1; Signed-off-by: Radhey Shyam Pandey Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/1700074613-1977070-2-git-send-email-radhey.shyam.pandey@amd.com Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/xlnx,axi-ethernet.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml index 1d33d80af11c..bbe89ea9590c 100644 --- a/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml +++ b/Documentation/devicetree/bindings/net/xlnx,axi-ethernet.yaml @@ -122,6 +122,20 @@ properties: and "phy-handle" should point to an external PHY if exists. maxItems: 1 + dmas: + minItems: 2 + maxItems: 32 + description: TX and RX DMA channel phandle + + dma-names: + items: + pattern: "^[tr]x_chan([0-9]|1[0-5])$" + description: + Should be "tx_chan0", "tx_chan1" ... "tx_chan15" for DMA Tx channel + Should be "rx_chan0", "rx_chan1" ... "rx_chan15" for DMA Rx channel + minItems: 2 + maxItems: 32 + required: - compatible - interrupts @@ -143,6 +157,8 @@ examples: clocks = <&axi_clk>, <&axi_clk>, <&pl_enet_ref_clk>, <&mgt_clk>; phy-mode = "mii"; reg = <0x40c00000 0x40000>,<0x50c00000 0x40000>; + dmas = <&xilinx_dma 0>, <&xilinx_dma 1>; + dma-names = "tx_chan0", "rx_chan0"; xlnx,rxcsum = <0x2>; xlnx,rxmem = <0x800>; xlnx,txcsum = <0x2>; -- cgit v1.3.1 From 0807dc76f3bf500f9a22465eedd2290da7357efb Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Fri, 17 Nov 2023 02:38:10 -0800 Subject: octeon_ep: support Octeon CN10K devices Add PCI Endpoint NIC support for Octeon CN10K devices. CN10K devices are part of Octeon 10 family products with similar PCI NIC characteristics. These include: - CN10KA - CNF10KA - CNF10KB - CN10KB Update supported device list in Documentation Signed-off-by: Shinas Rasheed Link: https://lore.kernel.org/r/20231117103817.2468176-1-srasheed@marvell.com Signed-off-by: Paolo Abeni --- .../device_drivers/ethernet/marvell/octeon_ep.rst | 4 + drivers/net/ethernet/marvell/octeon_ep/Makefile | 3 +- .../net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c | 886 +++++++++++++++++++++ .../net/ethernet/marvell/octeon_ep/octep_main.c | 20 + .../net/ethernet/marvell/octeon_ep/octep_main.h | 6 + .../marvell/octeon_ep/octep_regs_cnxk_pf.h | 400 ++++++++++ 6 files changed, 1318 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c create mode 100644 drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst index cad96c8d1f97..613a818d5db6 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst @@ -24,6 +24,10 @@ Supported Devices Currently, this driver support following devices: * Network controller: Cavium, Inc. Device b200 * Network controller: Cavium, Inc. Device b400 + * Network controller: Cavium, Inc. Device b900 + * Network controller: Cavium, Inc. Device ba00 + * Network controller: Cavium, Inc. Device bc00 + * Network controller: Cavium, Inc. Device bd00 Interface Control ================= diff --git a/drivers/net/ethernet/marvell/octeon_ep/Makefile b/drivers/net/ethernet/marvell/octeon_ep/Makefile index 2026c8118158..02a4a21bc298 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/Makefile +++ b/drivers/net/ethernet/marvell/octeon_ep/Makefile @@ -6,4 +6,5 @@ obj-$(CONFIG_OCTEON_EP) += octeon_ep.o octeon_ep-y := octep_main.o octep_cn9k_pf.o octep_tx.o octep_rx.o \ - octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o + octep_ethtool.o octep_ctrl_mbox.o octep_ctrl_net.o \ + octep_cnxk_pf.o diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c new file mode 100644 index 000000000000..abb03e9119e7 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cnxk_pf.c @@ -0,0 +1,886 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include +#include +#include + +#include "octep_config.h" +#include "octep_main.h" +#include "octep_regs_cnxk_pf.h" + +/* We will support 128 pf's in control mbox */ +#define CTRL_MBOX_MAX_PF 128 +#define CTRL_MBOX_SZ ((size_t)(0x400000 / CTRL_MBOX_MAX_PF)) + +/* Names of Hardware non-queue generic interrupts */ +static char *cnxk_non_ioq_msix_names[] = { + "epf_ire_rint", + "epf_ore_rint", + "epf_vfire_rint", + "epf_rsvd0", + "epf_vfore_rint", + "epf_rsvd1", + "epf_mbox_rint", + "epf_rsvd2_0", + "epf_rsvd2_1", + "epf_dma_rint", + "epf_dma_vf_rint", + "epf_rsvd3", + "epf_pp_vf_rint", + "epf_rsvd3", + "epf_misc_rint", + "epf_rsvd5", + /* Next 16 are for OEI_RINT */ + "epf_oei_rint0", + "epf_oei_rint1", + "epf_oei_rint2", + "epf_oei_rint3", + "epf_oei_rint4", + "epf_oei_rint5", + "epf_oei_rint6", + "epf_oei_rint7", + "epf_oei_rint8", + "epf_oei_rint9", + "epf_oei_rint10", + "epf_oei_rint11", + "epf_oei_rint12", + "epf_oei_rint13", + "epf_oei_rint14", + "epf_oei_rint15", + /* IOQ interrupt */ + "octeon_ep" +}; + +/* Dump useful hardware CSRs for debug purpose */ +static void cnxk_dump_regs(struct octep_device *oct, int qno) +{ + struct device *dev = &oct->pdev->dev; + + dev_info(dev, "IQ-%d register dump\n", qno); + dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INSTR_DBELL(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(qno))); + dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_CONTROL(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(qno))); + dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_ENABLE(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(qno))); + dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INSTR_BADDR(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(qno))); + dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INSTR_RSIZE(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(qno))); + dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_CNTS(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_CNTS(qno))); + dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_INT_LEVELS(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(qno))); + dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_PKT_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(qno))); + dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_IN_BYTE_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(qno))); + + dev_info(dev, "OQ-%d register dump\n", qno); + dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_SLIST_DBELL(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(qno))); + dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_CONTROL(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(qno))); + dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_ENABLE(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(qno))); + dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_SLIST_BADDR(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(qno))); + dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_SLIST_RSIZE(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(qno))); + dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_CNTS(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_CNTS(qno))); + dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_INT_LEVELS(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(qno))); + dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_PKT_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(qno))); + dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_OUT_BYTE_CNT(qno), + octep_read_csr64(oct, CNXK_SDP_R_OUT_BYTE_CNT(qno))); + dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n", + qno, CNXK_SDP_R_ERR_TYPE(qno), + octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(qno))); +} + +/* Reset Hardware Tx queue */ +static int cnxk_reset_iq(struct octep_device *oct, int q_no) +{ + struct octep_config *conf = oct->conf; + u64 val = 0ULL; + + dev_dbg(&oct->pdev->dev, "Reset PF IQ-%d\n", q_no); + + /* Get absolute queue number */ + q_no += conf->pf_ring_cfg.srn; + + /* Disable the Tx/Instruction Ring */ + octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(q_no), val); + + /* clear the Instruction Ring packet/byte counts and doorbell CSRs */ + octep_write_csr64(oct, CNXK_SDP_R_IN_CNTS(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_PKT_CNT(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_BYTE_CNT(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(q_no), val); + + val = 0xFFFFFFFF; + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(q_no), val); + + return 0; +} + +/* Reset Hardware Rx queue */ +static void cnxk_reset_oq(struct octep_device *oct, int q_no) +{ + u64 val = 0ULL; + + q_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + /* Disable Output (Rx) Ring */ + octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(q_no), val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(q_no), val); + + /* Clear count CSRs */ + val = octep_read_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no)); + octep_write_csr(oct, CNXK_SDP_R_OUT_CNTS(q_no), val); + + octep_write_csr64(oct, CNXK_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF); +} + +/* Reset all hardware Tx/Rx queues */ +static void octep_reset_io_queues_cnxk_pf(struct octep_device *oct) +{ + struct pci_dev *pdev = oct->pdev; + int q; + + dev_dbg(&pdev->dev, "Reset OCTEP_CNXK PF IO Queues\n"); + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + cnxk_reset_iq(oct, q); + cnxk_reset_oq(oct, q); + } +} + +/* Initialize windowed addresses to access some hardware registers */ +static void octep_setup_pci_window_regs_cnxk_pf(struct octep_device *oct) +{ + u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr; + + oct->pci_win_regs.pci_win_wr_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_ADDR64); + oct->pci_win_regs.pci_win_rd_addr = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_ADDR64); + oct->pci_win_regs.pci_win_wr_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_WR_DATA64); + oct->pci_win_regs.pci_win_rd_data = (u8 __iomem *)(bar0_pciaddr + CNXK_SDP_WIN_RD_DATA64); +} + +/* Configure Hardware mapping: inform hardware which rings belong to PF. */ +static void octep_configure_ring_mapping_cnxk_pf(struct octep_device *oct) +{ + struct octep_config *conf = oct->conf; + struct pci_dev *pdev = oct->pdev; + u64 pf_srn = CFG_GET_PORTS_PF_SRN(oct->conf); + int q; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(conf); q++) { + u64 regval = 0; + + if (oct->pcie_port) + regval = 8 << CNXK_SDP_FUNC_SEL_EPF_BIT_POS; + + octep_write_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q), regval); + + regval = octep_read_csr64(oct, CNXK_SDP_EPVF_RING(pf_srn + q)); + dev_dbg(&pdev->dev, "Write SDP_EPVF_RING[0x%llx] = 0x%llx\n", + CNXK_SDP_EPVF_RING(pf_srn + q), regval); + } +} + +/* Initialize configuration limits and initial active config */ +static void octep_init_config_cnxk_pf(struct octep_device *oct) +{ + struct octep_config *conf = oct->conf; + struct pci_dev *pdev = oct->pdev; + u8 link = 0; + u64 val; + int pos; + + /* Read ring configuration: + * PF ring count, number of VFs and rings per VF supported + */ + val = octep_read_csr64(oct, CNXK_SDP_EPF_RINFO); + dev_info(&pdev->dev, "SDP_EPF_RINFO[0x%x]:0x%llx\n", CNXK_SDP_EPF_RINFO, val); + conf->sriov_cfg.max_rings_per_vf = CNXK_SDP_EPF_RINFO_RPVF(val); + conf->sriov_cfg.active_rings_per_vf = conf->sriov_cfg.max_rings_per_vf; + conf->sriov_cfg.max_vfs = CNXK_SDP_EPF_RINFO_NVFS(val); + conf->sriov_cfg.active_vfs = conf->sriov_cfg.max_vfs; + conf->sriov_cfg.vf_srn = CNXK_SDP_EPF_RINFO_SRN(val); + + val = octep_read_csr64(oct, CNXK_SDP_MAC_PF_RING_CTL(oct->pcie_port)); + dev_info(&pdev->dev, "SDP_MAC_PF_RING_CTL[%d]:0x%llx\n", oct->pcie_port, val); + conf->pf_ring_cfg.srn = CNXK_SDP_MAC_PF_RING_CTL_SRN(val); + conf->pf_ring_cfg.max_io_rings = CNXK_SDP_MAC_PF_RING_CTL_RPPF(val); + conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings; + dev_info(&pdev->dev, "pf_srn=%u rpvf=%u nvfs=%u rppf=%u\n", + conf->pf_ring_cfg.srn, conf->sriov_cfg.active_rings_per_vf, + conf->sriov_cfg.active_vfs, conf->pf_ring_cfg.active_io_rings); + + conf->iq.num_descs = OCTEP_IQ_MAX_DESCRIPTORS; + conf->iq.instr_type = OCTEP_64BYTE_INSTR; + conf->iq.db_min = OCTEP_DB_MIN; + conf->iq.intr_threshold = OCTEP_IQ_INTR_THRESHOLD; + + conf->oq.num_descs = OCTEP_OQ_MAX_DESCRIPTORS; + conf->oq.buf_size = OCTEP_OQ_BUF_SIZE; + conf->oq.refill_threshold = OCTEP_OQ_REFILL_THRESHOLD; + conf->oq.oq_intr_pkt = OCTEP_OQ_INTR_PKT_THRESHOLD; + conf->oq.oq_intr_time = OCTEP_OQ_INTR_TIME_THRESHOLD; + + conf->msix_cfg.non_ioq_msix = CNXK_NUM_NON_IOQ_INTR; + conf->msix_cfg.ioq_msix = conf->pf_ring_cfg.active_io_rings; + conf->msix_cfg.non_ioq_msix_names = cnxk_non_ioq_msix_names; + + pos = pci_find_ext_capability(oct->pdev, PCI_EXT_CAP_ID_SRIOV); + if (pos) { + pci_read_config_byte(oct->pdev, + pos + PCI_SRIOV_FUNC_LINK, + &link); + link = PCI_DEVFN(PCI_SLOT(oct->pdev->devfn), link); + } + conf->ctrl_mbox_cfg.barmem_addr = (void __iomem *)oct->mmio[2].hw_addr + + CNXK_PEM_BAR4_INDEX_OFFSET + + (link * CTRL_MBOX_SZ); + + conf->fw_info.hb_interval = OCTEP_DEFAULT_FW_HB_INTERVAL; + conf->fw_info.hb_miss_count = OCTEP_DEFAULT_FW_HB_MISS_COUNT; +} + +/* Setup registers for a hardware Tx Queue */ +static void octep_setup_iq_regs_cnxk_pf(struct octep_device *oct, int iq_no) +{ + struct octep_iq *iq = oct->iq[iq_no]; + u32 reset_instr_cnt; + u64 reg_val; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no)); + + /* wait for IDLE to set to 1 */ + if (!(reg_val & CNXK_R_IN_CTL_IDLE)) { + do { + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no)); + } while (!(reg_val & CNXK_R_IN_CTL_IDLE)); + } + + reg_val |= CNXK_R_IN_CTL_RDSIZE; + reg_val |= CNXK_R_IN_CTL_IS_64B; + reg_val |= CNXK_R_IN_CTL_ESR; + octep_write_csr64(oct, CNXK_SDP_R_IN_CONTROL(iq_no), reg_val); + + /* Write the start of the input queue's ring and its size */ + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_BADDR(iq_no), + iq->desc_ring_dma); + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_RSIZE(iq_no), + iq->max_count); + + /* Remember the doorbell & instruction count register addr + * for this queue + */ + iq->doorbell_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_IN_INSTR_DBELL(iq_no); + iq->inst_cnt_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_IN_CNTS(iq_no); + iq->intr_lvl_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_IN_INT_LEVELS(iq_no); + + /* Store the current instruction counter (used in flush_iq calculation) */ + reset_instr_cnt = readl(iq->inst_cnt_reg); + writel(reset_instr_cnt, iq->inst_cnt_reg); + + /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */ + reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff; + octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val); +} + +/* Setup registers for a hardware Rx Queue */ +static void octep_setup_oq_regs_cnxk_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val; + u64 oq_ctl = 0ULL; + u32 time_threshold = 0; + struct octep_oq *oq = oct->oq[oq_no]; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); + + /* wait for IDLE to set to 1 */ + if (!(reg_val & CNXK_R_OUT_CTL_IDLE)) { + do { + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); + } while (!(reg_val & CNXK_R_OUT_CTL_IDLE)); + } + + reg_val &= ~(CNXK_R_OUT_CTL_IMODE); + reg_val &= ~(CNXK_R_OUT_CTL_ROR_P); + reg_val &= ~(CNXK_R_OUT_CTL_NSR_P); + reg_val &= ~(CNXK_R_OUT_CTL_ROR_I); + reg_val &= ~(CNXK_R_OUT_CTL_NSR_I); + reg_val &= ~(CNXK_R_OUT_CTL_ES_I); + reg_val &= ~(CNXK_R_OUT_CTL_ROR_D); + reg_val &= ~(CNXK_R_OUT_CTL_NSR_D); + reg_val &= ~(CNXK_R_OUT_CTL_ES_D); + reg_val |= (CNXK_R_OUT_CTL_ES_P); + + octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), reg_val); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_BADDR(oq_no), + oq->desc_ring_dma); + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_RSIZE(oq_no), + oq->max_count); + + oq_ctl = octep_read_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no)); + + /* Clear the ISIZE and BSIZE (22-0) */ + oq_ctl &= ~0x7fffffULL; + + /* Populate the BSIZE (15-0) */ + oq_ctl |= (oq->buffer_size & 0xffff); + octep_write_csr64(oct, CNXK_SDP_R_OUT_CONTROL(oq_no), oq_ctl); + + /* Get the mapped address of the pkt_sent and pkts_credit regs */ + oq->pkts_sent_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_OUT_CNTS(oq_no); + oq->pkts_credit_reg = oct->mmio[0].hw_addr + + CNXK_SDP_R_OUT_SLIST_DBELL(oq_no); + + time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf); + reg_val = ((u64)time_threshold << 32) | + CFG_GET_OQ_INTR_PKT(oct->conf); + octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); +} + +/* Setup registers for a PF mailbox */ +static void octep_setup_mbox_regs_cnxk_pf(struct octep_device *oct, int q_no) +{ + struct octep_mbox *mbox = oct->mbox[q_no]; + + mbox->q_no = q_no; + + /* PF mbox interrupt reg */ + mbox->mbox_int_reg = oct->mmio[0].hw_addr + CNXK_SDP_EPF_MBOX_RINT(0); + + /* PF to VF DATA reg. PF writes into this reg */ + mbox->mbox_write_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_PF_VF_DATA(q_no); + + /* VF to PF DATA reg. PF reads from this reg */ + mbox->mbox_read_reg = oct->mmio[0].hw_addr + CNXK_SDP_R_MBOX_VF_PF_DATA(q_no); +} + +/* Poll OEI events like heartbeat */ +static void octep_poll_oei_cnxk_pf(struct octep_device *oct) +{ + u64 reg0; + + /* Check for OEI INTR */ + reg0 = octep_read_csr64(oct, CNXK_SDP_EPF_OEI_RINT); + if (reg0) { + octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT, reg0); + if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX) + queue_work(octep_wq, &oct->ctrl_mbox_task); + if (reg0 & CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT) + atomic_set(&oct->hb_miss_cnt, 0); + } +} + +/* OEI interrupt handler */ +static irqreturn_t octep_oei_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + + octep_poll_oei_cnxk_pf(oct); + return IRQ_HANDLED; +} + +/* Process non-ioq interrupts required to keep pf interface running. + * OEI_RINT is needed for control mailbox + * MBOX_RINT is needed for pfvf mailbox + */ +static void octep_poll_non_ioq_interrupts_cnxk_pf(struct octep_device *oct) +{ + octep_poll_oei_cnxk_pf(oct); +} + +/* Interrupt handler for input ring error interrupts. */ +static irqreturn_t octep_ire_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + int i = 0; + + /* Check for IRERR INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_IRERR_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "received IRERR_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT, reg_val); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + reg_val = octep_read_csr64(oct, + CNXK_SDP_R_ERR_TYPE(i)); + if (reg_val) { + dev_info(&pdev->dev, + "Received err type on IQ-%d: 0x%llx\n", + i, reg_val); + octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i), + reg_val); + } + } + } + return IRQ_HANDLED; +} + +/* Interrupt handler for output ring error interrupts. */ +static irqreturn_t octep_ore_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + int i = 0; + + /* Check for ORERR INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_ORERR_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "Received ORERR_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT, reg_val); + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { + reg_val = octep_read_csr64(oct, CNXK_SDP_R_ERR_TYPE(i)); + if (reg_val) { + dev_info(&pdev->dev, + "Received err type on OQ-%d: 0x%llx\n", + i, reg_val); + octep_write_csr64(oct, CNXK_SDP_R_ERR_TYPE(i), + reg_val); + } + } + } + return IRQ_HANDLED; +} + +/* Interrupt handler for vf input ring error interrupts. */ +static irqreturn_t octep_vfire_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for VFIRE INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received VFIRE_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for vf output ring error interrupts. */ +static irqreturn_t octep_vfore_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for VFORE INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received VFORE_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for dpi dma related interrupts. */ +static irqreturn_t octep_dma_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + u64 reg_val = 0; + + /* Check for DMA INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_RINT); + if (reg_val) + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT, reg_val); + + return IRQ_HANDLED; +} + +/* Interrupt handler for dpi dma transaction error interrupts for VFs */ +static irqreturn_t octep_dma_vf_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for DMA VF INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received DMA_VF_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for pp transaction error interrupts for VFs */ +static irqreturn_t octep_pp_vf_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for PPVF INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0)); + if (reg_val) { + dev_info(&pdev->dev, + "Received PP_VF_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT(0), reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupt handler for mac related interrupts. */ +static irqreturn_t octep_misc_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + u64 reg_val = 0; + + /* Check for MISC INTR */ + reg_val = octep_read_csr64(oct, CNXK_SDP_EPF_MISC_RINT); + if (reg_val) { + dev_info(&pdev->dev, + "Received MISC_RINT intr: 0x%llx\n", reg_val); + octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT, reg_val); + } + return IRQ_HANDLED; +} + +/* Interrupts handler for all reserved interrupts. */ +static irqreturn_t octep_rsvd_intr_handler_cnxk_pf(void *dev) +{ + struct octep_device *oct = (struct octep_device *)dev; + struct pci_dev *pdev = oct->pdev; + + dev_info(&pdev->dev, "Reserved interrupts raised; Ignore\n"); + return IRQ_HANDLED; +} + +/* Tx/Rx queue interrupt handler */ +static irqreturn_t octep_ioq_intr_handler_cnxk_pf(void *data) +{ + struct octep_ioq_vector *vector = (struct octep_ioq_vector *)data; + struct octep_oq *oq = vector->oq; + + napi_schedule_irqoff(oq->napi); + return IRQ_HANDLED; +} + +/* soft reset */ +static int octep_soft_reset_cnxk_pf(struct octep_device *oct) +{ + dev_info(&oct->pdev->dev, "CNXKXX: Doing soft reset\n"); + + octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF); + + /* Firmware status CSR is supposed to be cleared by + * core domain reset, but due to a hw bug, it is not. + * Set it to RUNNING right before reset so that it is not + * left in READY (1) state after a reset. This is required + * in addition to the early setting to handle the case where + * the OcteonTX is unexpectedly reset, reboots, and then + * the module is removed. + */ + OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL), + FW_STATUS_RUNNING); + + /* Set chip domain reset bit */ + OCTEP_PCI_WIN_WRITE(oct, CNXK_RST_CHIP_DOMAIN_W1S, 1); + /* Wait till Octeon resets. */ + mdelay(10); + /* restore the reset value */ + octep_write_csr64(oct, CNXK_SDP_WIN_WR_MASK_REG, 0xFF); + + return 0; +} + +/* Re-initialize Octeon hardware registers */ +static void octep_reinit_regs_cnxk_pf(struct octep_device *oct) +{ + u32 i; + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + oct->hw_ops.setup_iq_regs(oct, i); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + oct->hw_ops.setup_oq_regs(oct, i); + + oct->hw_ops.enable_interrupts(oct); + oct->hw_ops.enable_io_queues(oct); + + for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) + writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg); +} + +/* Enable all interrupts */ +static void octep_enable_interrupts_cnxk_pf(struct octep_device *oct) +{ + u64 intr_mask = 0ULL; + int srn, num_rings, i; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (i = 0; i < num_rings; i++) + intr_mask |= (0x1ULL << (srn + i)); + + octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1S, -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(0), -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1S, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1S, intr_mask); + + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(0), -1ULL); +} + +/* Disable all interrupts */ +static void octep_disable_interrupts_cnxk_pf(struct octep_device *oct) +{ + u64 intr_mask = 0ULL; + int srn, num_rings, i; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (i = 0; i < num_rings; i++) + intr_mask |= (0x1ULL << (srn + i)); + + octep_write_csr64(oct, CNXK_SDP_EPF_IRERR_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_ORERR_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_OEI_RINT_ENA_W1C, -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(0), -1ULL); + + octep_write_csr64(oct, CNXK_SDP_EPF_MISC_RINT_ENA_W1C, intr_mask); + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_RINT_ENA_W1C, intr_mask); + + octep_write_csr64(oct, CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(0), -1ULL); + octep_write_csr64(oct, CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(0), -1ULL); +} + +/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ +static u32 octep_update_iq_read_index_cnxk_pf(struct octep_iq *iq) +{ + u32 pkt_in_done = readl(iq->inst_cnt_reg); + u32 last_done, new_idx; + + last_done = pkt_in_done - iq->pkt_in_done; + iq->pkt_in_done = pkt_in_done; + + new_idx = (iq->octep_read_index + last_done) % iq->max_count; + + return new_idx; +} + +/* Enable a hardware Tx Queue */ +static void octep_enable_iq_cnxk_pf(struct octep_device *oct, int iq_no) +{ + u64 loop = HZ; + u64 reg_val; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + octep_write_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF); + + while (octep_read_csr64(oct, CNXK_SDP_R_IN_INSTR_DBELL(iq_no)) && + loop--) { + schedule_timeout_interruptible(1); + } + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no)); + reg_val |= (0x1ULL << 62); + octep_write_csr64(oct, CNXK_SDP_R_IN_INT_LEVELS(iq_no), reg_val); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no)); + reg_val |= 0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val); +} + +/* Enable a hardware Rx Queue */ +static void octep_enable_oq_cnxk_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val = 0ULL; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no)); + reg_val |= (0x1ULL << 62); + octep_write_csr64(oct, CNXK_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); + + octep_write_csr64(oct, CNXK_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no)); + reg_val |= 0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val); +} + +/* Enable all hardware Tx/Rx Queues assined to PF */ +static void octep_enable_io_queues_cnxk_pf(struct octep_device *oct) +{ + u8 q; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + octep_enable_iq_cnxk_pf(oct, q); + octep_enable_oq_cnxk_pf(oct, q); + } +} + +/* Disable a hardware Tx Queue assined to PF */ +static void octep_disable_iq_cnxk_pf(struct octep_device *oct, int iq_no) +{ + u64 reg_val = 0ULL; + + iq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + + reg_val = octep_read_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no)); + reg_val &= ~0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_IN_ENABLE(iq_no), reg_val); +} + +/* Disable a hardware Rx Queue assined to PF */ +static void octep_disable_oq_cnxk_pf(struct octep_device *oct, int oq_no) +{ + u64 reg_val = 0ULL; + + oq_no += CFG_GET_PORTS_PF_SRN(oct->conf); + reg_val = octep_read_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no)); + reg_val &= ~0x1ULL; + octep_write_csr64(oct, CNXK_SDP_R_OUT_ENABLE(oq_no), reg_val); +} + +/* Disable all hardware Tx/Rx Queues assined to PF */ +static void octep_disable_io_queues_cnxk_pf(struct octep_device *oct) +{ + int q = 0; + + for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { + octep_disable_iq_cnxk_pf(oct, q); + octep_disable_oq_cnxk_pf(oct, q); + } +} + +/* Dump hardware registers (including Tx/Rx queues) for debugging. */ +static void octep_dump_registers_cnxk_pf(struct octep_device *oct) +{ + u8 srn, num_rings, q; + + srn = CFG_GET_PORTS_PF_SRN(oct->conf); + num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); + + for (q = srn; q < srn + num_rings; q++) + cnxk_dump_regs(oct, q); +} + +/** + * octep_device_setup_cnxk_pf() - Setup Octeon device. + * + * @oct: Octeon device private data structure. + * + * - initialize hardware operations. + * - get target side pcie port number for the device. + * - setup window access to hardware registers. + * - set initial configuration and max limits. + * - setup hardware mapping of rings to the PF device. + */ +void octep_device_setup_cnxk_pf(struct octep_device *oct) +{ + oct->hw_ops.setup_iq_regs = octep_setup_iq_regs_cnxk_pf; + oct->hw_ops.setup_oq_regs = octep_setup_oq_regs_cnxk_pf; + oct->hw_ops.setup_mbox_regs = octep_setup_mbox_regs_cnxk_pf; + + oct->hw_ops.oei_intr_handler = octep_oei_intr_handler_cnxk_pf; + oct->hw_ops.ire_intr_handler = octep_ire_intr_handler_cnxk_pf; + oct->hw_ops.ore_intr_handler = octep_ore_intr_handler_cnxk_pf; + oct->hw_ops.vfire_intr_handler = octep_vfire_intr_handler_cnxk_pf; + oct->hw_ops.vfore_intr_handler = octep_vfore_intr_handler_cnxk_pf; + oct->hw_ops.dma_intr_handler = octep_dma_intr_handler_cnxk_pf; + oct->hw_ops.dma_vf_intr_handler = octep_dma_vf_intr_handler_cnxk_pf; + oct->hw_ops.pp_vf_intr_handler = octep_pp_vf_intr_handler_cnxk_pf; + oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cnxk_pf; + oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cnxk_pf; + oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cnxk_pf; + oct->hw_ops.soft_reset = octep_soft_reset_cnxk_pf; + oct->hw_ops.reinit_regs = octep_reinit_regs_cnxk_pf; + + oct->hw_ops.enable_interrupts = octep_enable_interrupts_cnxk_pf; + oct->hw_ops.disable_interrupts = octep_disable_interrupts_cnxk_pf; + oct->hw_ops.poll_non_ioq_interrupts = octep_poll_non_ioq_interrupts_cnxk_pf; + + oct->hw_ops.update_iq_read_idx = octep_update_iq_read_index_cnxk_pf; + + oct->hw_ops.enable_iq = octep_enable_iq_cnxk_pf; + oct->hw_ops.enable_oq = octep_enable_oq_cnxk_pf; + oct->hw_ops.enable_io_queues = octep_enable_io_queues_cnxk_pf; + + oct->hw_ops.disable_iq = octep_disable_iq_cnxk_pf; + oct->hw_ops.disable_oq = octep_disable_oq_cnxk_pf; + oct->hw_ops.disable_io_queues = octep_disable_io_queues_cnxk_pf; + oct->hw_ops.reset_io_queues = octep_reset_io_queues_cnxk_pf; + + oct->hw_ops.dump_registers = octep_dump_registers_cnxk_pf; + + octep_setup_pci_window_regs_cnxk_pf(oct); + + oct->pcie_port = octep_read_csr64(oct, CNXK_SDP_MAC_NUMBER) & 0xff; + dev_info(&oct->pdev->dev, + "Octeon device using PCIE Port %d\n", oct->pcie_port); + + octep_init_config_cnxk_pf(oct); + octep_configure_ring_mapping_cnxk_pf(oct); + + /* Firmware status CSR is supposed to be cleared by + * core domain reset, but due to IPBUPEM-38842, it is not. + * Set it to RUNNING early in boot, so that unexpected resets + * leave it in a state that is not READY (1). + */ + OCTEP_PCI_WIN_WRITE(oct, CNXK_PEMX_PFX_CSX_PFCFGX(0, 0, CNXK_PCIEEP_VSECST_CTL), + FW_STATUS_RUNNING); +} diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 974a34be9ffa..2da00a701df2 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -24,6 +24,10 @@ struct workqueue_struct *octep_wq; static const struct pci_device_id octep_pci_id_tbl[] = { {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_PF)}, {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KA_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KB_PF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KB_PF)}, {0, }, }; MODULE_DEVICE_TABLE(pci, octep_pci_id_tbl); @@ -1147,6 +1151,14 @@ static const char *octep_devid_to_str(struct octep_device *oct) return "CN93XX"; case OCTEP_PCI_DEVICE_ID_CNF95N_PF: return "CNF95N"; + case OCTEP_PCI_DEVICE_ID_CN10KA_PF: + return "CN10KA"; + case OCTEP_PCI_DEVICE_ID_CNF10KA_PF: + return "CNF10KA"; + case OCTEP_PCI_DEVICE_ID_CNF10KB_PF: + return "CNF10KB"; + case OCTEP_PCI_DEVICE_ID_CN10KB_PF: + return "CN10KB"; default: return "Unsupported"; } @@ -1192,6 +1204,14 @@ int octep_device_setup(struct octep_device *oct) OCTEP_MINOR_REV(oct)); octep_device_setup_cn93_pf(oct); break; + case OCTEP_PCI_DEVICE_ID_CNF10KA_PF: + case OCTEP_PCI_DEVICE_ID_CN10KA_PF: + case OCTEP_PCI_DEVICE_ID_CNF10KB_PF: + case OCTEP_PCI_DEVICE_ID_CN10KB_PF: + dev_info(&pdev->dev, "Setting up OCTEON %s PF PASS%d.%d\n", + octep_devid_to_str(oct), OCTEP_MAJOR_REV(oct), OCTEP_MINOR_REV(oct)); + octep_device_setup_cnxk_pf(oct); + break; default: dev_err(&pdev->dev, "%s: unsupported device\n", __func__); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index c33e046b69a4..e2fe8b28eb0e 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -23,6 +23,11 @@ #define OCTEP_PCI_DEVICE_ID_CNF95N_PF 0xB400 //95N PF +#define OCTEP_PCI_DEVICE_ID_CN10KA_PF 0xB900 //CN10KA PF +#define OCTEP_PCI_DEVICE_ID_CNF10KA_PF 0xBA00 //CNF10KA PF +#define OCTEP_PCI_DEVICE_ID_CNF10KB_PF 0xBC00 //CNF10KB PF +#define OCTEP_PCI_DEVICE_ID_CN10KB_PF 0xBD00 //CN10KB PF + #define OCTEP_MAX_QUEUES 63 #define OCTEP_MAX_IQ OCTEP_MAX_QUEUES #define OCTEP_MAX_OQ OCTEP_MAX_QUEUES @@ -386,6 +391,7 @@ int octep_setup_oqs(struct octep_device *oct); void octep_free_oqs(struct octep_device *oct); void octep_oq_dbell_init(struct octep_device *oct); void octep_device_setup_cn93_pf(struct octep_device *oct); +void octep_device_setup_cnxk_pf(struct octep_device *oct); int octep_iq_process_completions(struct octep_iq *iq, u16 budget); int octep_oq_process_rx(struct octep_oq *oq, int budget); void octep_set_ethtool_ops(struct net_device *netdev); diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h new file mode 100644 index 000000000000..abe02df8af11 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cnxk_pf.h @@ -0,0 +1,400 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_REGS_CNXK_PF_H_ +#define _OCTEP_REGS_CNXK_PF_H_ + +/* ############################ RST ######################### */ +#define CNXK_RST_BOOT 0x000087E006001600ULL +#define CNXK_RST_CHIP_DOMAIN_W1S 0x000087E006001810ULL +#define CNXK_RST_CORE_DOMAIN_W1S 0x000087E006001820ULL +#define CNXK_RST_CORE_DOMAIN_W1C 0x000087E006001828ULL + +#define CNXK_CONFIG_XPANSION_BAR 0x38 +#define CNXK_CONFIG_PCIE_CAP 0x70 +#define CNXK_CONFIG_PCIE_DEVCAP 0x74 +#define CNXK_CONFIG_PCIE_DEVCTL 0x78 +#define CNXK_CONFIG_PCIE_LINKCAP 0x7C +#define CNXK_CONFIG_PCIE_LINKCTL 0x80 +#define CNXK_CONFIG_PCIE_SLOTCAP 0x84 +#define CNXK_CONFIG_PCIE_SLOTCTL 0x88 + +#define CNXK_PCIE_SRIOV_FDL 0x188 /* 0x98 */ +#define CNXK_PCIE_SRIOV_FDL_BIT_POS 0x10 +#define CNXK_PCIE_SRIOV_FDL_MASK 0xFF + +#define CNXK_CONFIG_PCIE_FLTMSK 0x720 + +/* ################# Offsets of RING, EPF, MAC ######################### */ +#define CNXK_RING_OFFSET (0x1ULL << 17) +#define CNXK_EPF_OFFSET (0x1ULL << 25) +#define CNXK_MAC_OFFSET (0x1ULL << 4) +#define CNXK_BIT_ARRAY_OFFSET (0x1ULL << 4) +#define CNXK_EPVF_RING_OFFSET (0x1ULL << 4) + +/* ################# Scratch Registers ######################### */ +#define CNXK_SDP_EPF_SCRATCH 0x209E0 + +/* ################# Window Registers ######################### */ +#define CNXK_SDP_WIN_WR_ADDR64 0x20000 +#define CNXK_SDP_WIN_RD_ADDR64 0x20010 +#define CNXK_SDP_WIN_WR_DATA64 0x20020 +#define CNXK_SDP_WIN_WR_MASK_REG 0x20030 +#define CNXK_SDP_WIN_RD_DATA64 0x20040 + +#define CNXK_SDP_MAC_NUMBER 0x2C100 + +/* ################# Global Previliged registers ######################### */ +#define CNXK_SDP_EPF_RINFO 0x209F0 + +#define CNXK_SDP_EPF_RINFO_SRN(val) ((val) & 0x7F) +#define CNXK_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF) +#define CNXK_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0x7F) + +/* SDP Function select */ +#define CNXK_SDP_FUNC_SEL_EPF_BIT_POS 7 +#define CNXK_SDP_FUNC_SEL_FUNC_BIT_POS 0 + +/* ##### RING IN (Into device from PCI: Tx Ring) REGISTERS #### */ +#define CNXK_SDP_R_IN_CONTROL_START 0x10000 +#define CNXK_SDP_R_IN_ENABLE_START 0x10010 +#define CNXK_SDP_R_IN_INSTR_BADDR_START 0x10020 +#define CNXK_SDP_R_IN_INSTR_RSIZE_START 0x10030 +#define CNXK_SDP_R_IN_INSTR_DBELL_START 0x10040 +#define CNXK_SDP_R_IN_CNTS_START 0x10050 +#define CNXK_SDP_R_IN_INT_LEVELS_START 0x10060 +#define CNXK_SDP_R_IN_PKT_CNT_START 0x10080 +#define CNXK_SDP_R_IN_BYTE_CNT_START 0x10090 + +#define CNXK_SDP_R_IN_CONTROL(ring) \ + (CNXK_SDP_R_IN_CONTROL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_ENABLE(ring) \ + (CNXK_SDP_R_IN_ENABLE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INSTR_BADDR(ring) \ + (CNXK_SDP_R_IN_INSTR_BADDR_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INSTR_RSIZE(ring) \ + (CNXK_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INSTR_DBELL(ring) \ + (CNXK_SDP_R_IN_INSTR_DBELL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_CNTS(ring) \ + (CNXK_SDP_R_IN_CNTS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INT_LEVELS(ring) \ + (CNXK_SDP_R_IN_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_PKT_CNT(ring) \ + (CNXK_SDP_R_IN_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_BYTE_CNT(ring) \ + (CNXK_SDP_R_IN_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +/* Rings per Virtual Function */ +#define CNXK_R_IN_CTL_RPVF_MASK (0xF) +#define CNXK_R_IN_CTL_RPVF_POS (48) + +/* Number of instructions to be read in one MAC read request. + * setting to Max value(4) + */ +#define CNXK_R_IN_CTL_IDLE (0x1ULL << 28) +#define CNXK_R_IN_CTL_RDSIZE (0x3ULL << 25) +#define CNXK_R_IN_CTL_IS_64B (0x1ULL << 24) +#define CNXK_R_IN_CTL_D_NSR (0x1ULL << 8) +#define CNXK_R_IN_CTL_D_ESR (0x1ULL << 6) +#define CNXK_R_IN_CTL_D_ROR (0x1ULL << 5) +#define CNXK_R_IN_CTL_NSR (0x1ULL << 3) +#define CNXK_R_IN_CTL_ESR (0x1ULL << 1) +#define CNXK_R_IN_CTL_ROR (0x1ULL << 0) + +#define CNXK_R_IN_CTL_MASK (CNXK_R_IN_CTL_RDSIZE | CNXK_R_IN_CTL_IS_64B) + +/* ##### RING OUT (out from device to PCI host: Rx Ring) REGISTERS #### */ +#define CNXK_SDP_R_OUT_CNTS_START 0x10100 +#define CNXK_SDP_R_OUT_INT_LEVELS_START 0x10110 +#define CNXK_SDP_R_OUT_SLIST_BADDR_START 0x10120 +#define CNXK_SDP_R_OUT_SLIST_RSIZE_START 0x10130 +#define CNXK_SDP_R_OUT_SLIST_DBELL_START 0x10140 +#define CNXK_SDP_R_OUT_CONTROL_START 0x10150 +#define CNXK_SDP_R_OUT_WMARK_START 0x10160 +#define CNXK_SDP_R_OUT_ENABLE_START 0x10170 +#define CNXK_SDP_R_OUT_PKT_CNT_START 0x10180 +#define CNXK_SDP_R_OUT_BYTE_CNT_START 0x10190 + +#define CNXK_SDP_R_OUT_CONTROL(ring) \ + (CNXK_SDP_R_OUT_CONTROL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_ENABLE(ring) \ + (CNXK_SDP_R_OUT_ENABLE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_SLIST_BADDR(ring) \ + (CNXK_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_SLIST_RSIZE(ring) \ + (CNXK_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_SLIST_DBELL(ring) \ + (CNXK_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_CNTS(ring) \ + (CNXK_SDP_R_OUT_CNTS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_LEVELS(ring) \ + (CNXK_SDP_R_OUT_INT_LEVELS_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_PKT_CNT(ring) \ + (CNXK_SDP_R_OUT_PKT_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_BYTE_CNT(ring) \ + (CNXK_SDP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_RING_OFFSET)) + +/*------------------ R_OUT Masks ----------------*/ +#define CNXK_R_OUT_INT_LEVELS_BMODE BIT_ULL(63) +#define CNXK_R_OUT_INT_LEVELS_TIMET (32) + +#define CNXK_R_OUT_CTL_IDLE BIT_ULL(40) +#define CNXK_R_OUT_CTL_ES_I BIT_ULL(34) +#define CNXK_R_OUT_CTL_NSR_I BIT_ULL(33) +#define CNXK_R_OUT_CTL_ROR_I BIT_ULL(32) +#define CNXK_R_OUT_CTL_ES_D BIT_ULL(30) +#define CNXK_R_OUT_CTL_NSR_D BIT_ULL(29) +#define CNXK_R_OUT_CTL_ROR_D BIT_ULL(28) +#define CNXK_R_OUT_CTL_ES_P BIT_ULL(26) +#define CNXK_R_OUT_CTL_NSR_P BIT_ULL(25) +#define CNXK_R_OUT_CTL_ROR_P BIT_ULL(24) +#define CNXK_R_OUT_CTL_IMODE BIT_ULL(23) + +/* ############### Interrupt Moderation Registers ############### */ +#define CNXK_SDP_R_IN_INT_MDRT_CTL0_START 0x10280 +#define CNXK_SDP_R_IN_INT_MDRT_CTL1_START 0x102A0 +#define CNXK_SDP_R_IN_INT_MDRT_DBG_START 0x102C0 + +#define CNXK_SDP_R_OUT_INT_MDRT_CTL0_START 0x10380 +#define CNXK_SDP_R_OUT_INT_MDRT_CTL1_START 0x103A0 +#define CNXK_SDP_R_OUT_INT_MDRT_DBG_START 0x103C0 + +#define CNXK_SDP_R_OUT_CNTS_ISM_START 0x10510 +#define CNXK_SDP_R_IN_CNTS_ISM_START 0x10520 + +#define CNXK_SDP_R_IN_INT_MDRT_CTL0(ring) \ + (CNXK_SDP_R_IN_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INT_MDRT_CTL1(ring) \ + (CNXK_SDP_R_IN_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_INT_MDRT_DBG(ring) \ + (CNXK_SDP_R_IN_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_MDRT_CTL0(ring) \ + (CNXK_SDP_R_OUT_INT_MDRT_CTL0_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_MDRT_CTL1(ring) \ + (CNXK_SDP_R_OUT_INT_MDRT_CTL1_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_INT_MDRT_DBG(ring) \ + (CNXK_SDP_R_OUT_INT_MDRT_DBG_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_CNTS_ISM(ring) \ + (CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_CNTS_ISM(ring) \ + (CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +/* ##################### Mail Box Registers ########################## */ +/* INT register for VF. when a MBOX write from PF happed to a VF, + * corresponding bit will be set in this register as well as in + * PF_VF_INT register. + * + * This is a RO register, the int can be cleared by writing 1 to PF_VF_INT + */ +/* Basically first 3 are from PF to VF. The last one is data from VF to PF */ +#define CNXK_SDP_R_MBOX_PF_VF_DATA_START 0x10210 +#define CNXK_SDP_R_MBOX_PF_VF_INT_START 0x10220 +#define CNXK_SDP_R_MBOX_VF_PF_DATA_START 0x10230 + +#define CNXK_SDP_R_MBOX_PF_VF_DATA(ring) \ + (CNXK_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_MBOX_PF_VF_INT(ring) \ + (CNXK_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_MBOX_VF_PF_DATA(ring) \ + (CNXK_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_RING_OFFSET)) + +/* ##################### Interrupt Registers ########################## */ +#define CNXK_SDP_R_ERR_TYPE_START 0x10400 + +#define CNXK_SDP_R_ERR_TYPE(ring) \ + (CNXK_SDP_R_ERR_TYPE_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_MBOX_ISM_START 0x10500 +#define CNXK_SDP_R_OUT_CNTS_ISM_START 0x10510 +#define CNXK_SDP_R_IN_CNTS_ISM_START 0x10520 + +#define CNXK_SDP_R_MBOX_ISM(ring) \ + (CNXK_SDP_R_MBOX_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_OUT_CNTS_ISM(ring) \ + (CNXK_SDP_R_OUT_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_R_IN_CNTS_ISM(ring) \ + (CNXK_SDP_R_IN_CNTS_ISM_START + ((ring) * CNXK_RING_OFFSET)) + +#define CNXK_SDP_EPF_MBOX_RINT_START 0x20100 +#define CNXK_SDP_EPF_MBOX_RINT_W1S_START 0x20120 +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START 0x20140 +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START 0x20160 + +#define CNXK_SDP_EPF_VFIRE_RINT_START 0x20180 +#define CNXK_SDP_EPF_VFIRE_RINT_W1S_START 0x201A0 +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START 0x201C0 +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START 0x201E0 + +#define CNXK_SDP_EPF_IRERR_RINT 0x20200 +#define CNXK_SDP_EPF_IRERR_RINT_W1S 0x20210 +#define CNXK_SDP_EPF_IRERR_RINT_ENA_W1C 0x20220 +#define CNXK_SDP_EPF_IRERR_RINT_ENA_W1S 0x20230 + +#define CNXK_SDP_EPF_VFORE_RINT_START 0x20240 +#define CNXK_SDP_EPF_VFORE_RINT_W1S_START 0x20260 +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START 0x20280 +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START 0x202A0 + +#define CNXK_SDP_EPF_ORERR_RINT 0x20320 +#define CNXK_SDP_EPF_ORERR_RINT_W1S 0x20330 +#define CNXK_SDP_EPF_ORERR_RINT_ENA_W1C 0x20340 +#define CNXK_SDP_EPF_ORERR_RINT_ENA_W1S 0x20350 + +#define CNXK_SDP_EPF_OEI_RINT 0x20400 +#define CNXK_SDP_EPF_OEI_RINT_W1S 0x20500 +#define CNXK_SDP_EPF_OEI_RINT_ENA_W1C 0x20600 +#define CNXK_SDP_EPF_OEI_RINT_ENA_W1S 0x20700 + +#define CNXK_SDP_EPF_DMA_RINT 0x20800 +#define CNXK_SDP_EPF_DMA_RINT_W1S 0x20810 +#define CNXK_SDP_EPF_DMA_RINT_ENA_W1C 0x20820 +#define CNXK_SDP_EPF_DMA_RINT_ENA_W1S 0x20830 + +#define CNXK_SDP_EPF_DMA_INT_LEVEL_START 0x20840 +#define CNXK_SDP_EPF_DMA_CNT_START 0x20860 +#define CNXK_SDP_EPF_DMA_TIM_START 0x20880 + +#define CNXK_SDP_EPF_MISC_RINT 0x208A0 +#define CNXK_SDP_EPF_MISC_RINT_W1S 0x208B0 +#define CNXK_SDP_EPF_MISC_RINT_ENA_W1C 0x208C0 +#define CNXK_SDP_EPF_MISC_RINT_ENA_W1S 0x208D0 + +#define CNXK_SDP_EPF_DMA_VF_RINT_START 0x208E0 +#define CNXK_SDP_EPF_DMA_VF_RINT_W1S_START 0x20900 +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START 0x20920 +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START 0x20940 + +#define CNXK_SDP_EPF_PP_VF_RINT_START 0x20960 +#define CNXK_SDP_EPF_PP_VF_RINT_W1S_START 0x20980 +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START 0x209A0 +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START 0x209C0 + +#define CNXK_SDP_EPF_MBOX_RINT(index) \ + (CNXK_SDP_EPF_MBOX_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_MBOX_RINT_W1S(index) \ + (CNXK_SDP_EPF_MBOX_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_MBOX_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_MBOX_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_MBOX_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_VFIRE_RINT(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFIRE_RINT_W1S(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_VFIRE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_VFORE_RINT(index) \ + (CNXK_SDP_EPF_VFORE_RINT_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFORE_RINT_W1S(index) \ + (CNXK_SDP_EPF_VFORE_RINT_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_VFORE_RINT_ENA_W1C_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_VFORE_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_VFORE_RINT_ENA_W1S_START + ((index) * CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_DMA_VF_RINT(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_DMA_VF_RINT_W1S(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_DMA_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) + +#define CNXK_SDP_EPF_PP_VF_RINT(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_PP_VF_RINT_W1S(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_ENA_W1C_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) +#define CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S(index) \ + (CNXK_SDP_EPF_PP_VF_RINT_ENA_W1S_START + ((index) + CNXK_BIT_ARRAY_OFFSET)) + +/*------------------ Interrupt Masks ----------------*/ +#define CNXK_INTR_R_SEND_ISM BIT_ULL(63) +#define CNXK_INTR_R_OUT_INT BIT_ULL(62) +#define CNXK_INTR_R_IN_INT BIT_ULL(61) +#define CNXK_INTR_R_MBOX_INT BIT_ULL(60) +#define CNXK_INTR_R_RESEND BIT_ULL(59) +#define CNXK_INTR_R_CLR_TIM BIT_ULL(58) + +/* ####################### Ring Mapping Registers ################################## */ +#define CNXK_SDP_EPVF_RING_START 0x26000 +#define CNXK_SDP_IN_RING_TB_MAP_START 0x28000 +#define CNXK_SDP_IN_RATE_LIMIT_START 0x2A000 +#define CNXK_SDP_MAC_PF_RING_CTL_START 0x2C000 + +#define CNXK_SDP_EPVF_RING(ring) \ + (CNXK_SDP_EPVF_RING_START + ((ring) * CNXK_EPVF_RING_OFFSET)) +#define CNXK_SDP_IN_RING_TB_MAP(ring) \ + (CNXK_SDP_N_RING_TB_MAP_START + ((ring) * CNXK_EPVF_RING_OFFSET)) +#define CNXK_SDP_IN_RATE_LIMIT(ring) \ + (CNXK_SDP_IN_RATE_LIMIT_START + ((ring) * CNXK_EPVF_RING_OFFSET)) +#define CNXK_SDP_MAC_PF_RING_CTL(mac) \ + (CNXK_SDP_MAC_PF_RING_CTL_START + ((mac) * CNXK_MAC_OFFSET)) + +#define CNXK_SDP_MAC_PF_RING_CTL_NPFS(val) ((val) & 0x3) +#define CNXK_SDP_MAC_PF_RING_CTL_SRN(val) (((val) >> 8) & 0x7F) +#define CNXK_SDP_MAC_PF_RING_CTL_RPPF(val) (((val) >> 16) & 0x3F) + +/* Number of non-queue interrupts in CNXKxx */ +#define CNXK_NUM_NON_IOQ_INTR 32 + +/* bit 0 for control mbox interrupt */ +#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_MBOX BIT_ULL(0) +/* bit 1 for firmware heartbeat interrupt */ +#define CNXK_SDP_EPF_OEI_RINT_DATA_BIT_HBEAT BIT_ULL(1) +#define FW_STATUS_RUNNING 2ULL +#define CNXK_PEMX_PFX_CSX_PFCFGX(pem, pf, offset) ({ typeof(offset) _off = (offset); \ + ((0x8e0000008000 | \ + (uint64_t)(pem) << 36 \ + | (pf) << 18 \ + | ((_off >> 16) & 1) << 16 \ + | (_off >> 3) << 3) \ + + (((_off >> 2) & 1) << 2)); \ + }) + +/* Register defines for use with CNXK_PEMX_PFX_CSX_PFCFGX */ +#define CNXK_PCIEEP_VSECST_CTL 0x418 + +#define CNXK_PEM_BAR4_INDEX 7 +#define CNXK_PEM_BAR4_INDEX_SIZE 0x400000ULL +#define CNXK_PEM_BAR4_INDEX_OFFSET (CNXK_PEM_BAR4_INDEX * CNXK_PEM_BAR4_INDEX_SIZE) + +#endif /* _OCTEP_REGS_CNXK_PF_H_ */ -- cgit v1.3.1 From c5b9f4792ea6b9abfcfb9486ba256f55e296aaa7 Mon Sep 17 00:00:00 2001 From: Niklas Söderlund Date: Tue, 21 Nov 2023 19:37:38 +0100 Subject: dt-bindings: net: renesas,ethertsn: Add Ethernet TSN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add bindings for Renesas R-Car Ethernet TSN End-station IP. The RTSN device provides Ethernet network. Signed-off-by: Niklas Söderlund Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231121183738.656192-1-niklas.soderlund+renesas@ragnatech.se Signed-off-by: Paolo Abeni --- .../devicetree/bindings/net/renesas,ethertsn.yaml | 133 +++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/renesas,ethertsn.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml new file mode 100644 index 000000000000..475aff7714d6 --- /dev/null +++ b/Documentation/devicetree/bindings/net/renesas,ethertsn.yaml @@ -0,0 +1,133 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/renesas,ethertsn.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas Ethernet TSN End-station + +maintainers: + - Niklas Söderlund + +description: + The RTSN device provides Ethernet network using a 10 Mbps, 100 Mbps, or 1 + Gbps full-duplex link via MII/GMII/RMII/RGMII. Depending on the connected PHY. + +allOf: + - $ref: ethernet-controller.yaml# + +properties: + compatible: + items: + - enum: + - renesas,r8a779g0-ethertsn # R-Car V4H + - const: renesas,rcar-gen4-ethertsn + + reg: + items: + - description: TSN End Station target + - description: generalized Precision Time Protocol target + + reg-names: + items: + - const: tsnes + - const: gptp + + interrupts: + items: + - description: TX data interrupt + - description: RX data interrupt + + interrupt-names: + items: + - const: tx + - const: rx + + clocks: + maxItems: 1 + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + phy-mode: + contains: + enum: + - mii + - rgmii + + phy-handle: + $ref: /schemas/types.yaml#/definitions/phandle + description: + Specifies a reference to a node representing a PHY device. + + rx-internal-delay-ps: + enum: [0, 1800] + + tx-internal-delay-ps: + enum: [0, 2000] + + '#address-cells': + const: 1 + + '#size-cells': + const: 0 + +patternProperties: + "^ethernet-phy@[0-9a-f]$": + type: object + $ref: ethernet-phy.yaml# + unevaluatedProperties: false + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - clocks + - power-domains + - resets + - phy-mode + - phy-handle + - '#address-cells' + - '#size-cells' + +additionalProperties: false + +examples: + - | + #include + #include + #include + #include + + tsn0: ethernet@e6460000 { + compatible = "renesas,r8a779g0-ethertsn", "renesas,rcar-gen4-ethertsn"; + reg = <0xe6460000 0x7000>, + <0xe6449000 0x500>; + reg-names = "tsnes", "gptp"; + interrupts = , + ; + interrupt-names = "tx", "rx"; + clocks = <&cpg CPG_MOD 2723>; + power-domains = <&sysc R8A779G0_PD_ALWAYS_ON>; + resets = <&cpg 2723>; + + phy-mode = "rgmii"; + tx-internal-delay-ps = <2000>; + phy-handle = <&phy3>; + + #address-cells = <1>; + #size-cells = <0>; + + phy3: ethernet-phy@3 { + compatible = "ethernet-phy-ieee802.3-c45"; + reg = <0>; + interrupt-parent = <&gpio4>; + interrupts = <3 IRQ_TYPE_LEVEL_LOW>; + reset-gpios = <&gpio1 23 GPIO_ACTIVE_LOW>; + }; + }; -- cgit v1.3.1 From f061c9f7d058ffc32de66f2efb3e1c368e305423 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Tue, 21 Nov 2023 03:48:31 -0800 Subject: Documentation: Document each netlink family This is a simple script that parses the Netlink YAML spec files (Documentation/netlink/specs/), and generates RST files to be rendered in the Network -> Netlink Specification documentation page. Create a python script that is invoked during 'make htmldocs', reads the YAML specs input file and generate the correspondent RST file. Create a new Documentation/networking/netlink_spec index page, and reference each Netlink RST file that was processed above in this main index.rst file. In case of any exception during the parsing, dump the error and skip the file. Do not regenerate the RST files if the input files (YAML) were not changed in-between invocations. Suggested-by: Jakub Kicinski Signed-off-by: Breno Leitao ---- Changelog: V3: * Do not regenerate the RST files if the input files were not changed. In order to do it, a few things changed: - Rely on Makefile more to find what changed, and trigger individual file processing - The script parses file by file now (instead of batches) - Create a new option to generate the index file V2: * Moved the logic from a sphinx extension to a external script * Adjust some formatting as suggested by Donald Hunter and Jakub * Auto generating all the rsts instead of having stubs * Handling error gracefully Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/Makefile | 16 +- Documentation/networking/index.rst | 1 + Documentation/networking/netlink_spec/.gitignore | 1 + Documentation/networking/netlink_spec/readme.txt | 4 + tools/net/ynl/ynl-gen-rst.py | 388 +++++++++++++++++++++++ 5 files changed, 409 insertions(+), 1 deletion(-) create mode 100644 Documentation/networking/netlink_spec/.gitignore create mode 100644 Documentation/networking/netlink_spec/readme.txt create mode 100755 tools/net/ynl/ynl-gen-rst.py (limited to 'Documentation') diff --git a/Documentation/Makefile b/Documentation/Makefile index 2f35793acd2a..5c156fbb6cdf 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -97,7 +97,21 @@ quiet_cmd_sphinx = SPHINX $@ --> file://$(abspath $(BUILDDIR)/$3/$4) cp $(if $(patsubst /%,,$(DOCS_CSS)),$(abspath $(srctree)/$(DOCS_CSS)),$(DOCS_CSS)) $(BUILDDIR)/$3/_static/; \ fi -htmldocs: +YNL_INDEX:=$(srctree)/Documentation/networking/netlink_spec/index.rst +YNL_RST_DIR:=$(srctree)/Documentation/networking/netlink_spec +YNL_YAML_DIR:=$(srctree)/Documentation/netlink/specs +YNL_TOOL:=$(srctree)/tools/net/ynl/ynl-gen-rst.py + +YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml)) +YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP)) + +$(YNL_INDEX): $(YNL_RST_FILES) + @$(YNL_TOOL) -o $@ -x + +$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml + @$(YNL_TOOL) -i $< -o $@ + +htmldocs: $(YNL_INDEX) @$(srctree)/scripts/sphinx-pre-install --version-check @+$(foreach var,$(SPHINXDIRS),$(call loop_cmd,sphinx,html,$(var),,$(var))) diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 683eb42309cc..cb435c141794 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -55,6 +55,7 @@ Contents: filter generic-hdlc generic_netlink + netlink_spec/index gen_stats gtp ila diff --git a/Documentation/networking/netlink_spec/.gitignore b/Documentation/networking/netlink_spec/.gitignore new file mode 100644 index 000000000000..30d85567b592 --- /dev/null +++ b/Documentation/networking/netlink_spec/.gitignore @@ -0,0 +1 @@ +*.rst diff --git a/Documentation/networking/netlink_spec/readme.txt b/Documentation/networking/netlink_spec/readme.txt new file mode 100644 index 000000000000..6763f99d216c --- /dev/null +++ b/Documentation/networking/netlink_spec/readme.txt @@ -0,0 +1,4 @@ +SPDX-License-Identifier: GPL-2.0 + +This file is populated during the build of the documentation (htmldocs) by the +tools/net/ynl/ynl-gen-rst.py script. diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py new file mode 100755 index 000000000000..b6292109e236 --- /dev/null +++ b/tools/net/ynl/ynl-gen-rst.py @@ -0,0 +1,388 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# -*- coding: utf-8; mode: python -*- + +""" + Script to auto generate the documentation for Netlink specifications. + + :copyright: Copyright (C) 2023 Breno Leitao + :license: GPL Version 2, June 1991 see linux/COPYING for details. + + This script performs extensive parsing to the Linux kernel's netlink YAML + spec files, in an effort to avoid needing to heavily mark up the original + YAML file. + + This code is split in three big parts: + 1) RST formatters: Use to convert a string to a RST output + 2) Parser helpers: Functions to parse the YAML data structure + 3) Main function and small helpers +""" + +from typing import Any, Dict, List +import os.path +import sys +import argparse +import logging +import yaml + + +SPACE_PER_LEVEL = 4 + + +# RST Formatters +# ============== +def headroom(level: int) -> str: + """Return space to format""" + return " " * (level * SPACE_PER_LEVEL) + + +def bold(text: str) -> str: + """Format bold text""" + return f"**{text}**" + + +def inline(text: str) -> str: + """Format inline text""" + return f"``{text}``" + + +def sanitize(text: str) -> str: + """Remove newlines and multiple spaces""" + # This is useful for some fields that are spread across multiple lines + return str(text).replace("\n", "").strip() + + +def rst_fields(key: str, value: str, level: int = 0) -> str: + """Return a RST formatted field""" + return headroom(level) + f":{key}: {value}" + + +def rst_definition(key: str, value: Any, level: int = 0) -> str: + """Format a single rst definition""" + return headroom(level) + key + "\n" + headroom(level + 1) + str(value) + + +def rst_paragraph(paragraph: str, level: int = 0) -> str: + """Return a formatted paragraph""" + return headroom(level) + paragraph + + +def rst_bullet(item: str, level: int = 0) -> str: + """Return a formatted a bullet""" + return headroom(level) + f" - {item}" + + +def rst_subsection(title: str) -> str: + """Add a sub-section to the document""" + return f"{title}\n" + "-" * len(title) + + +def rst_subsubsection(title: str) -> str: + """Add a sub-sub-section to the document""" + return f"{title}\n" + "~" * len(title) + + +def rst_section(title: str) -> str: + """Add a section to the document""" + return f"\n{title}\n" + "=" * len(title) + + +def rst_subtitle(title: str) -> str: + """Add a subtitle to the document""" + return "\n" + "-" * len(title) + f"\n{title}\n" + "-" * len(title) + "\n\n" + + +def rst_title(title: str) -> str: + """Add a title to the document""" + return "=" * len(title) + f"\n{title}\n" + "=" * len(title) + "\n\n" + + +def rst_list_inline(list_: List[str], level: int = 0) -> str: + """Format a list using inlines""" + return headroom(level) + "[" + ", ".join(inline(i) for i in list_) + "]" + + +def rst_header() -> str: + """The headers for all the auto generated RST files""" + lines = [] + + lines.append(rst_paragraph(".. SPDX-License-Identifier: GPL-2.0")) + lines.append(rst_paragraph(".. NOTE: This document was auto-generated.\n\n")) + + return "\n".join(lines) + + +def rst_toctree(maxdepth: int = 2) -> str: + """Generate a toctree RST primitive""" + lines = [] + + lines.append(".. toctree::") + lines.append(f" :maxdepth: {maxdepth}\n\n") + + return "\n".join(lines) + + +# Parsers +# ======= + + +def parse_mcast_group(mcast_group: List[Dict[str, Any]]) -> str: + """Parse 'multicast' group list and return a formatted string""" + lines = [] + for group in mcast_group: + lines.append(rst_bullet(group["name"])) + + return "\n".join(lines) + + +def parse_do(do_dict: Dict[str, Any], level: int = 0) -> str: + """Parse 'do' section and return a formatted string""" + lines = [] + for key in do_dict.keys(): + lines.append(rst_paragraph(bold(key), level + 1)) + lines.append(parse_do_attributes(do_dict[key], level + 1) + "\n") + + return "\n".join(lines) + + +def parse_do_attributes(attrs: Dict[str, Any], level: int = 0) -> str: + """Parse 'attributes' section""" + if "attributes" not in attrs: + return "" + lines = [rst_fields("attributes", rst_list_inline(attrs["attributes"]), level + 1)] + + return "\n".join(lines) + + +def parse_operations(operations: List[Dict[str, Any]]) -> str: + """Parse operations block""" + preprocessed = ["name", "doc", "title", "do", "dump"] + lines = [] + + for operation in operations: + lines.append(rst_section(operation["name"])) + lines.append(rst_paragraph(sanitize(operation["doc"])) + "\n") + + for key in operation.keys(): + if key in preprocessed: + # Skip the special fields + continue + lines.append(rst_fields(key, operation[key], 0)) + + if "do" in operation: + lines.append(rst_paragraph(":do:", 0)) + lines.append(parse_do(operation["do"], 0)) + if "dump" in operation: + lines.append(rst_paragraph(":dump:", 0)) + lines.append(parse_do(operation["dump"], 0)) + + # New line after fields + lines.append("\n") + + return "\n".join(lines) + + +def parse_entries(entries: List[Dict[str, Any]], level: int) -> str: + """Parse a list of entries""" + lines = [] + for entry in entries: + if isinstance(entry, dict): + # entries could be a list or a dictionary + lines.append( + rst_fields(entry.get("name", ""), sanitize(entry.get("doc", "")), level) + ) + elif isinstance(entry, list): + lines.append(rst_list_inline(entry, level)) + else: + lines.append(rst_bullet(inline(sanitize(entry)), level)) + + lines.append("\n") + return "\n".join(lines) + + +def parse_definitions(defs: Dict[str, Any]) -> str: + """Parse definitions section""" + preprocessed = ["name", "entries", "members"] + ignored = ["render-max"] # This is not printed + lines = [] + + for definition in defs: + lines.append(rst_section(definition["name"])) + for k in definition.keys(): + if k in preprocessed + ignored: + continue + lines.append(rst_fields(k, sanitize(definition[k]), 0)) + + # Field list needs to finish with a new line + lines.append("\n") + if "entries" in definition: + lines.append(rst_paragraph(":entries:", 0)) + lines.append(parse_entries(definition["entries"], 1)) + if "members" in definition: + lines.append(rst_paragraph(":members:", 0)) + lines.append(parse_entries(definition["members"], 1)) + + return "\n".join(lines) + + +def parse_attr_sets(entries: List[Dict[str, Any]]) -> str: + """Parse attribute from attribute-set""" + preprocessed = ["name", "type"] + ignored = ["checks"] + lines = [] + + for entry in entries: + lines.append(rst_section(entry["name"])) + for attr in entry["attributes"]: + type_ = attr.get("type") + attr_line = bold(attr["name"]) + if type_: + # Add the attribute type in the same line + attr_line += f" ({inline(type_)})" + + lines.append(rst_subsubsection(attr_line)) + + for k in attr.keys(): + if k in preprocessed + ignored: + continue + lines.append(rst_fields(k, sanitize(attr[k]), 2)) + lines.append("\n") + + return "\n".join(lines) + + +def parse_yaml(obj: Dict[str, Any]) -> str: + """Format the whole YAML into a RST string""" + lines = [] + + # Main header + + lines.append(rst_header()) + + title = f"Family ``{obj['name']}`` netlink specification" + lines.append(rst_title(title)) + lines.append(rst_paragraph(".. contents::\n")) + + if "doc" in obj: + lines.append(rst_subtitle("Summary")) + lines.append(rst_paragraph(obj["doc"], 0)) + + # Operations + if "operations" in obj: + lines.append(rst_subtitle("Operations")) + lines.append(parse_operations(obj["operations"]["list"])) + + # Multicast groups + if "mcast-groups" in obj: + lines.append(rst_subtitle("Multicast groups")) + lines.append(parse_mcast_group(obj["mcast-groups"]["list"])) + + # Definitions + if "definitions" in obj: + lines.append(rst_subtitle("Definitions")) + lines.append(parse_definitions(obj["definitions"])) + + # Attributes set + if "attribute-sets" in obj: + lines.append(rst_subtitle("Attribute sets")) + lines.append(parse_attr_sets(obj["attribute-sets"])) + + return "\n".join(lines) + + +# Main functions +# ============== + + +def parse_arguments() -> argparse.Namespace: + """Parse arguments from user""" + parser = argparse.ArgumentParser(description="Netlink RST generator") + + parser.add_argument("-v", "--verbose", action="store_true") + parser.add_argument("-o", "--output", help="Output file name") + + # Index and input are mutually exclusive + group = parser.add_mutually_exclusive_group() + group.add_argument( + "-x", "--index", action="store_true", help="Generate the index page" + ) + group.add_argument("-i", "--input", help="YAML file name") + + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.DEBUG) + + if args.input and not os.path.isfile(args.input): + logging.warning("%s is not a valid file.", args.input) + sys.exit(-1) + + if not args.output: + logging.error("No output file specified.") + sys.exit(-1) + + if os.path.isfile(args.output): + logging.debug("%s already exists. Overwriting it.", args.output) + + return args + + +def parse_yaml_file(filename: str) -> str: + """Transform the YAML specified by filename into a rst-formmated string""" + with open(filename, "r", encoding="utf-8") as spec_file: + yaml_data = yaml.safe_load(spec_file) + content = parse_yaml(yaml_data) + + return content + + +def write_to_rstfile(content: str, filename: str) -> None: + """Write the generated content into an RST file""" + logging.debug("Saving RST file to %s", filename) + + with open(filename, "w", encoding="utf-8") as rst_file: + rst_file.write(content) + + +def generate_main_index_rst(output: str) -> None: + """Generate the `networking_spec/index` content and write to the file""" + lines = [] + + lines.append(rst_header()) + lines.append(rst_title("Netlink Specification")) + lines.append(rst_toctree(1)) + + index_dir = os.path.dirname(output) + logging.debug("Looking for .rst files in %s", index_dir) + for filename in os.listdir(index_dir): + if not filename.endswith(".rst") or filename == "index.rst": + continue + lines.append(f" {filename.replace('.rst', '')}\n") + + logging.debug("Writing an index file at %s", output) + write_to_rstfile("".join(lines), output) + + +def main() -> None: + """Main function that reads the YAML files and generates the RST files""" + + args = parse_arguments() + + if args.input: + logging.debug("Parsing %s", args.input) + try: + content = parse_yaml_file(os.path.join(args.input)) + except Exception as exception: + logging.warning("Failed to parse %s.", args.input) + logging.warning(exception) + sys.exit(-1) + + write_to_rstfile(content, args.output) + + if args.index: + # Generate the index RST file + generate_main_index_rst(args.output) + + +if __name__ == "__main__": + main() -- cgit v1.3.1 From f8e80fc4acebab0447567e77d6abc30fb44250cc Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Wed, 22 Nov 2023 21:52:57 +0800 Subject: net/smc: add sysctl for max links per lgr for SMC-R v2.1 Add a new sysctl: net.smc.smcr_max_links_per_lgr, which is used to control the preferred max links per lgr for SMC-R v2.1. The default value of this sysctl is 2, and the acceptable value ranges from 1 to 2. Signed-off-by: Guangguan Wang Reviewed-by: Dust Li Signed-off-by: David S. Miller --- Documentation/networking/smc-sysctl.rst | 8 ++++++++ include/net/netns/smc.h | 1 + net/smc/af_smc.c | 2 +- net/smc/smc_clc.c | 10 +++++++--- net/smc/smc_clc.h | 3 ++- net/smc/smc_sysctl.c | 12 ++++++++++++ net/smc/smc_sysctl.h | 1 + 7 files changed, 32 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index 769149d98773..c6ef86ef4c4f 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -57,3 +57,11 @@ rmem - INTEGER only allowed 512KiB for SMC-R and 1MiB for SMC-D. Default: 64KiB + +smcr_max_links_per_lgr - INTEGER + Controls the max number of links can be added to a SMC-R link group. Notice that + the actual number of the links added to a SMC-R link group depends on the number + of RDMA devices exist in the system. The acceptable value ranges from 1 to 2. Only + for SMC-R v2.1 and later. + + Default: 2 diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index 582212ada3ba..da7023587824 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -22,5 +22,6 @@ struct netns_smc { int sysctl_smcr_testlink_time; int sysctl_wmem; int sysctl_rmem; + int sysctl_max_links_per_lgr; }; #endif diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 2a1388841951..c61666e5beed 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -2461,7 +2461,7 @@ static void smc_listen_work(struct work_struct *work) if (rc) goto out_decl; - rc = smc_clc_srv_v2x_features_validate(pclc, ini); + rc = smc_clc_srv_v2x_features_validate(new_smc, pclc, ini); if (rc) goto out_decl; diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 8deb46c28f1d..1f87c8895a27 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -824,6 +824,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) struct smc_clc_smcd_gid_chid *gidchids; struct smc_clc_msg_proposal_area *pclc; struct smc_clc_ipv6_prefix *ipv6_prfx; + struct net *net = sock_net(&smc->sk); struct smc_clc_v2_extension *v2_ext; struct smc_clc_msg_smcd *pclc_smcd; struct smc_clc_msg_trail *trl; @@ -944,7 +945,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER; - v2_ext->max_links = SMC_LINKS_PER_LGR_MAX_PREFER; + v2_ext->max_links = net->smc.sysctl_max_links_per_lgr; } pclc_base->hdr.length = htons(plen); @@ -1171,10 +1172,12 @@ int smc_clc_send_accept(struct smc_sock *new_smc, bool srv_first_contact, return len > 0 ? 0 : len; } -int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, +int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, + struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini) { struct smc_clc_v2_extension *pclc_v2_ext; + struct net *net = sock_net(&smc->sk); ini->max_conns = SMC_CONN_PER_LGR_MAX; ini->max_links = SMC_LINKS_ADD_LNK_MAX; @@ -1192,7 +1195,8 @@ int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, if (ini->max_conns < SMC_CONN_PER_LGR_MIN) return SMC_CLC_DECL_MAXCONNERR; - ini->max_links = min_t(u8, pclc_v2_ext->max_links, SMC_LINKS_PER_LGR_MAX_PREFER); + ini->max_links = min_t(u8, pclc_v2_ext->max_links, + net->smc.sysctl_max_links_per_lgr); if (ini->max_links < SMC_LINKS_ADD_LNK_MIN) return SMC_CLC_DECL_MAXLINKERR; } diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index c5c8e7db775a..89b258cedffe 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -422,7 +422,8 @@ int smc_clc_send_confirm(struct smc_sock *smc, bool clnt_first_contact, u8 version, u8 *eid, struct smc_init_info *ini); int smc_clc_send_accept(struct smc_sock *smc, bool srv_first_contact, u8 version, u8 *negotiated_eid, struct smc_init_info *ini); -int smc_clc_srv_v2x_features_validate(struct smc_clc_msg_proposal *pclc, +int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, + struct smc_clc_msg_proposal *pclc, struct smc_init_info *ini); int smc_clc_clnt_v2x_features_validate(struct smc_clc_first_contact_ext *fce, struct smc_init_info *ini); diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 5cbc18c6e62b..3e9bb921e40a 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -25,6 +25,8 @@ static int max_sndbuf = INT_MAX / 2; static int max_rcvbuf = INT_MAX / 2; static const int net_smc_wmem_init = (64 * 1024); static const int net_smc_rmem_init = (64 * 1024); +static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; +static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; static struct ctl_table smc_table[] = { { @@ -68,6 +70,15 @@ static struct ctl_table smc_table[] = { .extra1 = &min_rcvbuf, .extra2 = &max_rcvbuf, }, + { + .procname = "smcr_max_links_per_lgr", + .data = &init_net.smc.sysctl_max_links_per_lgr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &links_per_lgr_min, + .extra2 = &links_per_lgr_max, + }, { } }; @@ -97,6 +108,7 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); + net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; return 0; diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h index 0becc11bd2f4..5783dd7575dd 100644 --- a/net/smc/smc_sysctl.h +++ b/net/smc/smc_sysctl.h @@ -23,6 +23,7 @@ void __net_exit smc_sysctl_net_exit(struct net *net); static inline int smc_sysctl_net_init(struct net *net) { net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; + net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; return 0; } -- cgit v1.3.1 From 1f2c9dd73f0a279214f9b3c382b3f1df272b3253 Mon Sep 17 00:00:00 2001 From: Guangguan Wang Date: Wed, 22 Nov 2023 21:52:58 +0800 Subject: net/smc: add sysctl for max conns per lgr for SMC-R v2.1 Add a new sysctl: net.smc.smcr_max_conns_per_lgr, which is used to control the preferred max connections per lgr for SMC-R v2.1. The default value of this sysctl is 255, and the acceptable value ranges from 16 to 255. Signed-off-by: Guangguan Wang Reviewed-by: Dust Li Signed-off-by: David S. Miller --- Documentation/networking/smc-sysctl.rst | 6 ++++++ include/net/netns/smc.h | 1 + net/smc/smc_clc.c | 5 +++-- net/smc/smc_sysctl.c | 12 ++++++++++++ net/smc/smc_sysctl.h | 1 + 5 files changed, 23 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/smc-sysctl.rst b/Documentation/networking/smc-sysctl.rst index c6ef86ef4c4f..a874d007f2db 100644 --- a/Documentation/networking/smc-sysctl.rst +++ b/Documentation/networking/smc-sysctl.rst @@ -65,3 +65,9 @@ smcr_max_links_per_lgr - INTEGER for SMC-R v2.1 and later. Default: 2 + +smcr_max_conns_per_lgr - INTEGER + Controls the max number of connections can be added to a SMC-R link group. The + acceptable value ranges from 16 to 255. Only for SMC-R v2.1 and later. + + Default: 255 diff --git a/include/net/netns/smc.h b/include/net/netns/smc.h index da7023587824..fc752a50f91b 100644 --- a/include/net/netns/smc.h +++ b/include/net/netns/smc.h @@ -23,5 +23,6 @@ struct netns_smc { int sysctl_wmem; int sysctl_rmem; int sysctl_max_links_per_lgr; + int sysctl_max_conns_per_lgr; }; #endif diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 1f87c8895a27..0fda5156eef0 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -944,7 +944,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini) } if (smcr_indicated(ini->smc_type_v2)) { memcpy(v2_ext->roce, ini->smcrv2.ib_gid_v2, SMC_GID_SIZE); - v2_ext->max_conns = SMC_CONN_PER_LGR_PREFER; + v2_ext->max_conns = net->smc.sysctl_max_conns_per_lgr; v2_ext->max_links = net->smc.sysctl_max_links_per_lgr; } @@ -1191,7 +1191,8 @@ int smc_clc_srv_v2x_features_validate(struct smc_sock *smc, return SMC_CLC_DECL_NOV2EXT; if (ini->smcr_version & SMC_V2) { - ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, SMC_CONN_PER_LGR_PREFER); + ini->max_conns = min_t(u8, pclc_v2_ext->max_conns, + net->smc.sysctl_max_conns_per_lgr); if (ini->max_conns < SMC_CONN_PER_LGR_MIN) return SMC_CLC_DECL_MAXCONNERR; diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 3e9bb921e40a..a5946d1b9d60 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -27,6 +27,8 @@ static const int net_smc_wmem_init = (64 * 1024); static const int net_smc_rmem_init = (64 * 1024); static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; +static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; +static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; static struct ctl_table smc_table[] = { { @@ -79,6 +81,15 @@ static struct ctl_table smc_table[] = { .extra1 = &links_per_lgr_min, .extra2 = &links_per_lgr_max, }, + { + .procname = "smcr_max_conns_per_lgr", + .data = &init_net.smc.sysctl_max_conns_per_lgr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &conns_per_lgr_min, + .extra2 = &conns_per_lgr_max, + }, { } }; @@ -109,6 +120,7 @@ int __net_init smc_sysctl_net_init(struct net *net) WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; + net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; return 0; diff --git a/net/smc/smc_sysctl.h b/net/smc/smc_sysctl.h index 5783dd7575dd..eb2465ae1e15 100644 --- a/net/smc/smc_sysctl.h +++ b/net/smc/smc_sysctl.h @@ -24,6 +24,7 @@ static inline int smc_sysctl_net_init(struct net *net) { net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; + net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; return 0; } -- cgit v1.3.1 From dd043b393c8536574a95c567c49d237c4c604699 Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Wed, 22 Nov 2023 17:09:05 -0600 Subject: dt-bindings: net: qcom,ipa: add SM8550 compatible Add support for SM8550, which uses IPA v5.5. Signed-off-by: Alex Elder Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 2d5e4ffb2f9e..702eadccdf99 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -52,6 +52,7 @@ properties: - qcom,sdx65-ipa - qcom,sm6350-ipa - qcom,sm8350-ipa + - qcom,sm8550-ipa reg: items: -- cgit v1.3.1 From 839ff60df3ab92b81034ea3b859ab984eaa0c84c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:33 -0800 Subject: net: page_pool: add nlspec for basic access to page pools Add a Netlink spec in YAML for getting very basic information about page pools. Reviewed-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/netdev.yaml | 46 +++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 14511b13f305..84ca3c2ab872 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -86,6 +86,34 @@ attribute-sets: See Documentation/networking/xdp-rx-metadata.rst for more details. type: u64 enum: xdp-rx-metadata + - + name: page-pool + attributes: + - + name: id + doc: Unique ID of a Page Pool instance. + type: uint + checks: + min: 1 + max: u32-max + - + name: ifindex + doc: | + ifindex of the netdev to which the pool belongs. + May be reported as 0 if the page pool was allocated for a netdev + which got destroyed already (page pools may outlast their netdevs + because they wait for all memory to be returned). + type: u32 + checks: + min: 1 + max: s32-max + - + name: napi-id + doc: Id of NAPI using this Page Pool instance. + type: uint + checks: + min: 1 + max: u32-max operations: list: @@ -120,6 +148,24 @@ operations: doc: Notification about device configuration being changed. notify: dev-get mcgrp: mgmt + - + name: page-pool-get + doc: | + Get / dump information about Page Pools. + (Only Page Pools associated with a net_device can be listed.) + attribute-set: page-pool + do: + request: + attributes: + - id + reply: &pp-reply + attributes: + - id + - ifindex + - napi-id + dump: + reply: *pp-reply + config-cond: page-pool mcast-groups: list: -- cgit v1.3.1 From d2ef6aa077bdd0b3495dba5dcae6d3f19579b20b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:35 -0800 Subject: net: page_pool: add netlink notifications for state changes Generate netlink notifications about page pool state changes. Reviewed-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/netdev.yaml | 20 ++++++++++++++++++ include/uapi/linux/netdev.h | 4 ++++ net/core/netdev-genl-gen.c | 1 + net/core/netdev-genl-gen.h | 1 + net/core/page_pool_user.c | 36 +++++++++++++++++++++++++++++++++ 5 files changed, 62 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 84ca3c2ab872..82fbe81f7a49 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -166,8 +166,28 @@ operations: dump: reply: *pp-reply config-cond: page-pool + - + name: page-pool-add-ntf + doc: Notification about page pool appearing. + notify: page-pool-get + mcgrp: page-pool + config-cond: page-pool + - + name: page-pool-del-ntf + doc: Notification about page pool disappearing. + notify: page-pool-get + mcgrp: page-pool + config-cond: page-pool + - + name: page-pool-change-ntf + doc: Notification about page pool configuration being changed. + notify: page-pool-get + mcgrp: page-pool + config-cond: page-pool mcast-groups: list: - name: mgmt + - + name: page-pool diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 176665bcf0da..beb158872226 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -79,11 +79,15 @@ enum { NETDEV_CMD_DEV_DEL_NTF, NETDEV_CMD_DEV_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_GET, + NETDEV_CMD_PAGE_POOL_ADD_NTF, + NETDEV_CMD_PAGE_POOL_DEL_NTF, + NETDEV_CMD_PAGE_POOL_CHANGE_NTF, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) }; #define NETDEV_MCGRP_MGMT "mgmt" +#define NETDEV_MCGRP_PAGE_POOL "page-pool" #endif /* _UAPI_LINUX_NETDEV_H */ diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index bfde13981c77..47fb5e1b6369 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -60,6 +60,7 @@ static const struct genl_split_ops netdev_nl_ops[] = { static const struct genl_multicast_group netdev_nl_mcgrps[] = { [NETDEV_NLGRP_MGMT] = { "mgmt", }, + [NETDEV_NLGRP_PAGE_POOL] = { "page-pool", }, }; struct genl_family netdev_nl_family __ro_after_init = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index a011d12abff4..738097847100 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -19,6 +19,7 @@ int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb, enum { NETDEV_NLGRP_MGMT, + NETDEV_NLGRP_PAGE_POOL, }; extern struct genl_family netdev_nl_family; diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 7eb37c31fce9..1577fef880c9 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -135,6 +135,37 @@ err_cancel: return -EMSGSIZE; } +static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd) +{ + struct genl_info info; + struct sk_buff *ntf; + struct net *net; + + lockdep_assert_held(&page_pools_lock); + + /* 'invisible' page pools don't matter */ + if (hlist_unhashed(&pool->user.list)) + return; + net = dev_net(pool->slow.netdev); + + if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL)) + return; + + genl_info_init_ntf(&info, &netdev_nl_family, cmd); + + ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!ntf) + return; + + if (page_pool_nl_fill(ntf, pool, &info)) { + nlmsg_free(ntf); + return; + } + + genlmsg_multicast_netns(&netdev_nl_family, net, ntf, + 0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL); +} + int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info) { u32 id; @@ -168,6 +199,8 @@ int page_pool_list(struct page_pool *pool) hlist_add_head(&pool->user.list, &pool->slow.netdev->page_pools); pool->user.napi_id = pool->p.napi ? pool->p.napi->napi_id : 0; + + netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF); } mutex_unlock(&page_pools_lock); @@ -181,6 +214,7 @@ err_unlock: void page_pool_unlist(struct page_pool *pool) { mutex_lock(&page_pools_lock); + netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF); xa_erase(&page_pools, pool->user.id); hlist_del(&pool->user.list); mutex_unlock(&page_pools_lock); @@ -210,6 +244,8 @@ static void page_pool_unreg_netdev(struct net_device *netdev) last = NULL; hlist_for_each_entry(pool, &netdev->page_pools, user.list) { pool->slow.netdev = lo; + netdev_nl_page_pool_event(pool, + NETDEV_CMD_PAGE_POOL_CHANGE_NTF); last = pool; } if (last) -- cgit v1.3.1 From 7aee8429eedd0970d8add2fb5b856bfc5f5f1fc1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:36 -0800 Subject: net: page_pool: report amount of memory held by page pools Advanced deployments need the ability to check memory use of various system components. It makes it possible to make informed decisions about memory allocation and to find regressions and leaks. Report memory use of page pools. Report both number of references and bytes held. Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/netdev.yaml | 15 +++++++++++++++ include/uapi/linux/netdev.h | 2 ++ net/core/page_pool.c | 13 +++++++++---- net/core/page_pool_priv.h | 2 ++ net/core/page_pool_user.c | 8 ++++++++ 5 files changed, 36 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 82fbe81f7a49..b76623ff2932 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -114,6 +114,19 @@ attribute-sets: checks: min: 1 max: u32-max + - + name: inflight + type: uint + doc: | + Number of outstanding references to this page pool (allocated + but yet to be freed pages). Allocated pages may be held in + socket receive queues, driver receive ring, page pool recycling + ring, the page pool cache, etc. + - + name: inflight-mem + type: uint + doc: | + Amount of memory held by inflight pages. operations: list: @@ -163,6 +176,8 @@ operations: - id - ifindex - napi-id + - inflight + - inflight-mem dump: reply: *pp-reply config-cond: page-pool diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index beb158872226..26ae5bdd3187 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -68,6 +68,8 @@ enum { NETDEV_A_PAGE_POOL_ID = 1, NETDEV_A_PAGE_POOL_IFINDEX, NETDEV_A_PAGE_POOL_NAPI_ID, + NETDEV_A_PAGE_POOL_INFLIGHT, + NETDEV_A_PAGE_POOL_INFLIGHT_MEM, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a8d96ea38d18..566390759294 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -529,7 +529,7 @@ EXPORT_SYMBOL(page_pool_alloc_pages); */ #define _distance(a, b) (s32)((a) - (b)) -static s32 page_pool_inflight(struct page_pool *pool) +s32 page_pool_inflight(const struct page_pool *pool, bool strict) { u32 release_cnt = atomic_read(&pool->pages_state_release_cnt); u32 hold_cnt = READ_ONCE(pool->pages_state_hold_cnt); @@ -537,8 +537,13 @@ static s32 page_pool_inflight(struct page_pool *pool) inflight = _distance(hold_cnt, release_cnt); - trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); - WARN(inflight < 0, "Negative(%d) inflight packet-pages", inflight); + if (strict) { + trace_page_pool_release(pool, inflight, hold_cnt, release_cnt); + WARN(inflight < 0, "Negative(%d) inflight packet-pages", + inflight); + } else { + inflight = max(0, inflight); + } return inflight; } @@ -881,7 +886,7 @@ static int page_pool_release(struct page_pool *pool) int inflight; page_pool_scrub(pool); - inflight = page_pool_inflight(pool); + inflight = page_pool_inflight(pool, true); if (!inflight) __page_pool_destroy(pool); diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index c17ea092b4ab..72fb21ea1ddc 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -3,6 +3,8 @@ #ifndef __PAGE_POOL_PRIV_H #define __PAGE_POOL_PRIV_H +s32 page_pool_inflight(const struct page_pool *pool, bool strict); + int page_pool_list(struct page_pool *pool); void page_pool_unlist(struct page_pool *pool); diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 1577fef880c9..2db71e718485 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -110,6 +110,7 @@ static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) { + size_t inflight, refsz; void *hdr; hdr = genlmsg_iput(rsp, info); @@ -127,6 +128,13 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, pool->user.napi_id)) goto err_cancel; + inflight = page_pool_inflight(pool, false); + refsz = PAGE_SIZE << pool->p.order; + if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, + inflight * refsz)) + goto err_cancel; + genlmsg_end(rsp, hdr); return 0; -- cgit v1.3.1 From 69cb4952b6f6a226c1c0a7ca400398aaa8f75cf2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:37 -0800 Subject: net: page_pool: report when page pool was destroyed Report when page pool was destroyed. Together with the inflight / memory use reporting this can serve as a replacement for the warning about leaked page pools we currently print to dmesg. Example output for a fake leaked page pool using some hacks in netdevsim (one "live" pool, and one "leaked" on the same dev): $ ./cli.py --no-schema --spec netlink/specs/netdev.yaml \ --dump page-pool-get [{'id': 2, 'ifindex': 3}, {'id': 1, 'ifindex': 3, 'destroyed': 133, 'inflight': 1}] Tested-by: Dragos Tatulea Reviewed-by: Eric Dumazet Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/netdev.yaml | 13 +++++++++++++ include/net/page_pool/types.h | 1 + include/uapi/linux/netdev.h | 1 + net/core/page_pool.c | 1 + net/core/page_pool_priv.h | 1 + net/core/page_pool_user.c | 12 ++++++++++++ 6 files changed, 29 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index b76623ff2932..b5f715cf9e06 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -127,6 +127,18 @@ attribute-sets: type: uint doc: | Amount of memory held by inflight pages. + - + name: detach-time + type: uint + doc: | + Seconds in CLOCK_BOOTTIME of when Page Pool was detached by + the driver. Once detached Page Pool can no longer be used to + allocate memory. + Page Pools wait for all the memory allocated from them to be freed + before truly disappearing. "Detached" Page Pools cannot be + "re-attached", they are just waiting to disappear. + Attribute is absent if Page Pool has not been detached, and + can still be used to allocate new memory. operations: list: @@ -178,6 +190,7 @@ operations: - napi-id - inflight - inflight-mem + - detach-time dump: reply: *pp-reply config-cond: page-pool diff --git a/include/net/page_pool/types.h b/include/net/page_pool/types.h index 7e47d7bb2c1e..ac286ea8ce2d 100644 --- a/include/net/page_pool/types.h +++ b/include/net/page_pool/types.h @@ -193,6 +193,7 @@ struct page_pool { /* User-facing fields, protected by page_pools_lock */ struct { struct hlist_node list; + u64 detach_time; u32 napi_id; u32 id; } user; diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 26ae5bdd3187..756410274120 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -70,6 +70,7 @@ enum { NETDEV_A_PAGE_POOL_NAPI_ID, NETDEV_A_PAGE_POOL_INFLIGHT, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, + NETDEV_A_PAGE_POOL_DETACH_TIME, __NETDEV_A_PAGE_POOL_MAX, NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 566390759294..a821fb5fe054 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -953,6 +953,7 @@ void page_pool_destroy(struct page_pool *pool) if (!page_pool_release(pool)) return; + page_pool_detached(pool); pool->defer_start = jiffies; pool->defer_warn = jiffies + DEFER_WARN_INTERVAL; diff --git a/net/core/page_pool_priv.h b/net/core/page_pool_priv.h index 72fb21ea1ddc..90665d40f1eb 100644 --- a/net/core/page_pool_priv.h +++ b/net/core/page_pool_priv.h @@ -6,6 +6,7 @@ s32 page_pool_inflight(const struct page_pool *pool, bool strict); int page_pool_list(struct page_pool *pool); +void page_pool_detached(struct page_pool *pool); void page_pool_unlist(struct page_pool *pool); #endif diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index 2db71e718485..bd5ca94f683f 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -134,6 +134,10 @@ page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM, inflight * refsz)) goto err_cancel; + if (pool->user.detach_time && + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME, + pool->user.detach_time)) + goto err_cancel; genlmsg_end(rsp, hdr); @@ -219,6 +223,14 @@ err_unlock: return err; } +void page_pool_detached(struct page_pool *pool) +{ + mutex_lock(&page_pools_lock); + pool->user.detach_time = ktime_get_boottime_seconds(); + netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF); + mutex_unlock(&page_pools_lock); +} + void page_pool_unlist(struct page_pool *pool) { mutex_lock(&page_pools_lock); -- cgit v1.3.1 From d49010adae737638447369a4eff8f1aab736b076 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 26 Nov 2023 15:07:38 -0800 Subject: net: page_pool: expose page pool stats via netlink Dump the stats into netlink. More clever approaches like dumping the stats per-CPU for each CPU individually to see where the packets get consumed can be implemented in the future. A trimmed example from a real (but recently booted system): $ ./cli.py --no-schema --spec netlink/specs/netdev.yaml \ --dump page-pool-stats-get [{'info': {'id': 19, 'ifindex': 2}, 'alloc-empty': 48, 'alloc-fast': 3024, 'alloc-refill': 0, 'alloc-slow': 48, 'alloc-slow-high-order': 0, 'alloc-waive': 0, 'recycle-cache-full': 0, 'recycle-cached': 0, 'recycle-released-refcnt': 0, 'recycle-ring': 0, 'recycle-ring-full': 0}, {'info': {'id': 18, 'ifindex': 2}, 'alloc-empty': 66, 'alloc-fast': 11811, 'alloc-refill': 35, 'alloc-slow': 66, 'alloc-slow-high-order': 0, 'alloc-waive': 0, 'recycle-cache-full': 1145, 'recycle-cached': 6541, 'recycle-released-refcnt': 0, 'recycle-ring': 1275, 'recycle-ring-full': 0}, {'info': {'id': 17, 'ifindex': 2}, 'alloc-empty': 73, 'alloc-fast': 62099, 'alloc-refill': 413, ... Acked-by: Jesper Dangaard Brouer Signed-off-by: Jakub Kicinski Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/netdev.yaml | 78 ++++++++++++++++++++++++ Documentation/networking/page_pool.rst | 10 +++- include/net/page_pool/helpers.h | 8 +-- include/uapi/linux/netdev.h | 19 ++++++ net/core/netdev-genl-gen.c | 32 ++++++++++ net/core/netdev-genl-gen.h | 7 +++ net/core/page_pool.c | 2 +- net/core/page_pool_user.c | 103 ++++++++++++++++++++++++++++++++ 8 files changed, 250 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index b5f715cf9e06..20f75b7d3240 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -139,6 +139,59 @@ attribute-sets: "re-attached", they are just waiting to disappear. Attribute is absent if Page Pool has not been detached, and can still be used to allocate new memory. + - + name: page-pool-info + subset-of: page-pool + attributes: + - + name: id + - + name: ifindex + - + name: page-pool-stats + doc: | + Page pool statistics, see docs for struct page_pool_stats + for information about individual statistics. + attributes: + - + name: info + doc: Page pool identifying information. + type: nest + nested-attributes: page-pool-info + - + name: alloc-fast + type: uint + value: 8 # reserve some attr ids in case we need more metadata later + - + name: alloc-slow + type: uint + - + name: alloc-slow-high-order + type: uint + - + name: alloc-empty + type: uint + - + name: alloc-refill + type: uint + - + name: alloc-waive + type: uint + - + name: recycle-cached + type: uint + - + name: recycle-cache-full + type: uint + - + name: recycle-ring + type: uint + - + name: recycle-ring-full + type: uint + - + name: recycle-released-refcnt + type: uint operations: list: @@ -212,6 +265,31 @@ operations: notify: page-pool-get mcgrp: page-pool config-cond: page-pool + - + name: page-pool-stats-get + doc: Get page pool statistics. + attribute-set: page-pool-stats + do: + request: + attributes: + - info + reply: &pp-stats-reply + attributes: + - info + - alloc-fast + - alloc-slow + - alloc-slow-high-order + - alloc-empty + - alloc-refill + - alloc-waive + - recycle-cached + - recycle-cache-full + - recycle-ring + - recycle-ring-full + - recycle-released-refcnt + dump: + reply: *pp-stats-reply + config-cond: page-pool-stats mcast-groups: list: diff --git a/Documentation/networking/page_pool.rst b/Documentation/networking/page_pool.rst index 60993cb56b32..9d958128a57c 100644 --- a/Documentation/networking/page_pool.rst +++ b/Documentation/networking/page_pool.rst @@ -41,6 +41,11 @@ Architecture overview | Fast cache | | ptr-ring cache | +-----------------+ +------------------+ +Monitoring +========== +Information about page pools on the system can be accessed via the netdev +genetlink family (see Documentation/netlink/specs/netdev.yaml). + API interface ============= The number of pools created **must** match the number of hardware queues @@ -107,8 +112,9 @@ page_pool_get_stats() and structures described below are available. It takes a pointer to a ``struct page_pool`` and a pointer to a struct page_pool_stats allocated by the caller. -The API will fill in the provided struct page_pool_stats with -statistics about the page_pool. +Older drivers expose page pool statistics via ethtool or debugfs. +The same statistics are accessible via the netlink netdev family +in a driver-independent fashion. .. kernel-doc:: include/net/page_pool/types.h :identifiers: struct page_pool_recycle_stats diff --git a/include/net/page_pool/helpers.h b/include/net/page_pool/helpers.h index 4ebd544ae977..7dc65774cde5 100644 --- a/include/net/page_pool/helpers.h +++ b/include/net/page_pool/helpers.h @@ -55,16 +55,12 @@ #include #ifdef CONFIG_PAGE_POOL_STATS +/* Deprecated driver-facing API, use netlink instead */ int page_pool_ethtool_stats_get_count(void); u8 *page_pool_ethtool_stats_get_strings(u8 *data); u64 *page_pool_ethtool_stats_get(u64 *data, void *stats); -/* - * Drivers that wish to harvest page pool stats and report them to users - * (perhaps via ethtool, debugfs, or another mechanism) can allocate a - * struct page_pool_stats call page_pool_get_stats to get stats for the specified pool. - */ -bool page_pool_get_stats(struct page_pool *pool, +bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats); #else static inline int page_pool_ethtool_stats_get_count(void) diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 756410274120..2b37233e00c0 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -76,6 +76,24 @@ enum { NETDEV_A_PAGE_POOL_MAX = (__NETDEV_A_PAGE_POOL_MAX - 1) }; +enum { + NETDEV_A_PAGE_POOL_STATS_INFO = 1, + NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST = 8, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW, + NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER, + NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY, + NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL, + NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL, + NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT, + + __NETDEV_A_PAGE_POOL_STATS_MAX, + NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -85,6 +103,7 @@ enum { NETDEV_CMD_PAGE_POOL_ADD_NTF, NETDEV_CMD_PAGE_POOL_DEL_NTF, NETDEV_CMD_PAGE_POOL_CHANGE_NTF, + NETDEV_CMD_PAGE_POOL_STATS_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index 47fb5e1b6369..dccd8c3a141e 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -16,6 +16,17 @@ static const struct netlink_range_validation netdev_a_page_pool_id_range = { .max = 4294967295ULL, }; +static const struct netlink_range_validation netdev_a_page_pool_ifindex_range = { + .min = 1ULL, + .max = 2147483647ULL, +}; + +/* Common nested types */ +const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { + [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), + [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), +}; + /* NETDEV_CMD_DEV_GET - do */ static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = { [NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), @@ -28,6 +39,13 @@ static const struct nla_policy netdev_page_pool_get_nl_policy[NETDEV_A_PAGE_POOL }; #endif /* CONFIG_PAGE_POOL */ +/* NETDEV_CMD_PAGE_POOL_STATS_GET - do */ +#ifdef CONFIG_PAGE_POOL_STATS +static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAGE_POOL_STATS_INFO + 1] = { + [NETDEV_A_PAGE_POOL_STATS_INFO] = NLA_POLICY_NESTED(netdev_page_pool_info_nl_policy), +}; +#endif /* CONFIG_PAGE_POOL_STATS */ + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -56,6 +74,20 @@ static const struct genl_split_ops netdev_nl_ops[] = { .flags = GENL_CMD_CAP_DUMP, }, #endif /* CONFIG_PAGE_POOL */ +#ifdef CONFIG_PAGE_POOL_STATS + { + .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET, + .doit = netdev_nl_page_pool_stats_get_doit, + .policy = netdev_page_pool_stats_get_nl_policy, + .maxattr = NETDEV_A_PAGE_POOL_STATS_INFO, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_PAGE_POOL_STATS_GET, + .dumpit = netdev_nl_page_pool_stats_get_dumpit, + .flags = GENL_CMD_CAP_DUMP, + }, +#endif /* CONFIG_PAGE_POOL_STATS */ }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 738097847100..649e4b46eccf 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -11,11 +11,18 @@ #include +/* Common nested types */ +extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; + int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb, + struct genl_info *info); +int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/page_pool.c b/net/core/page_pool.c index a821fb5fe054..3d0938a60646 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -71,7 +71,7 @@ static const char pp_stats[][ETH_GSTRING_LEN] = { * is passed to this API which is filled in. The caller can then report * those stats to the user (perhaps via ethtool, debugfs, etc.). */ -bool page_pool_get_stats(struct page_pool *pool, +bool page_pool_get_stats(const struct page_pool *pool, struct page_pool_stats *stats) { int cpu = 0; diff --git a/net/core/page_pool_user.c b/net/core/page_pool_user.c index bd5ca94f683f..1426434a7e15 100644 --- a/net/core/page_pool_user.c +++ b/net/core/page_pool_user.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "page_pool_priv.h" @@ -106,6 +107,108 @@ netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb, return err; } +static int +page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool, + const struct genl_info *info) +{ +#ifdef CONFIG_PAGE_POOL_STATS + struct page_pool_stats stats = {}; + struct nlattr *nest; + void *hdr; + + if (!page_pool_get_stats(pool, &stats)) + return 0; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) + return -EMSGSIZE; + + nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO); + + if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) || + (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX && + nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX, + pool->slow.netdev->ifindex))) + goto err_cancel_nest; + + nla_nest_end(rsp, nest); + + if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST, + stats.alloc_stats.fast) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW, + stats.alloc_stats.slow) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER, + stats.alloc_stats.slow_high_order) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY, + stats.alloc_stats.empty) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL, + stats.alloc_stats.refill) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE, + stats.alloc_stats.waive) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED, + stats.recycle_stats.cached) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL, + stats.recycle_stats.cache_full) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING, + stats.recycle_stats.ring) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL, + stats.recycle_stats.ring_full) || + nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT, + stats.recycle_stats.released_refcnt)) + goto err_cancel_msg; + + genlmsg_end(rsp, hdr); + + return 0; +err_cancel_nest: + nla_nest_cancel(rsp, nest); +err_cancel_msg: + genlmsg_cancel(rsp, hdr); + return -EMSGSIZE; +#else + GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS"); + return -EOPNOTSUPP; +#endif +} + +int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)]; + struct nlattr *nest; + int err; + u32 id; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO)) + return -EINVAL; + + nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO]; + err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest, + netdev_page_pool_info_nl_policy, + info->extack); + if (err) + return err; + + if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID)) + return -EINVAL; + if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) { + NL_SET_ERR_MSG_ATTR(info->extack, + tb[NETDEV_A_PAGE_POOL_IFINDEX], + "selecting by ifindex not supported"); + return -EINVAL; + } + + id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]); + + return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill); +} + +int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill); +} + static int page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool, const struct genl_info *info) -- cgit v1.3.1 From 48eb03dd26304c24f03bdbb9382e89c8564e71df Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:08 -0800 Subject: xsk: Add TX timestamp and TX checksum offload support This change actually defines the (initial) metadata layout that should be used by AF_XDP userspace (xsk_tx_metadata). The first field is flags which requests appropriate offloads, followed by the offload-specific fields. The supported per-device offloads are exported via netlink (new xsk-flags). The offloads themselves are still implemented in a bit of a framework-y fashion that's left from my initial kfunc attempt. I'm introducing new xsk_tx_metadata_ops which drivers are supposed to implement. The drivers are also supposed to call xsk_tx_metadata_request/xsk_tx_metadata_complete in the right places. Since xsk_tx_metadata_{request,_complete} are static inline, we don't incur any extra overhead doing indirect calls. The benefit of this scheme is as follows: - keeps all metadata layout parsing away from driver code - makes it easy to grep and see which drivers implement what - don't need any extra flags to maintain to keep track of what offloads are implemented; if the callback is implemented - the offload is supported (used by netlink reporting code) Two offloads are defined right now: 1. XDP_TXMD_FLAGS_CHECKSUM: skb-style csum_start+csum_offset 2. XDP_TXMD_FLAGS_TIMESTAMP: writes TX timestamp back into metadata area upon completion (tx_timestamp field) XDP_TXMD_FLAGS_TIMESTAMP is also implemented for XDP_COPY mode: it writes SW timestamp from the skb destructor (note I'm reusing hwtstamps to pass metadata pointer). The struct is forward-compatible and can be extended in the future by appending more fields. Reviewed-by: Song Yoong Siang Signed-off-by: Stanislav Fomichev Acked-by: Jakub Kicinski Link: https://lore.kernel.org/r/20231127190319.1190813-3-sdf@google.com Signed-off-by: Alexei Starovoitov --- Documentation/netlink/specs/netdev.yaml | 19 +++++- include/linux/netdevice.h | 2 + include/linux/skbuff.h | 14 +++- include/net/xdp_sock.h | 110 ++++++++++++++++++++++++++++++++ include/net/xdp_sock_drv.h | 13 ++++ include/net/xsk_buff_pool.h | 6 ++ include/uapi/linux/if_xdp.h | 38 +++++++++++ include/uapi/linux/netdev.h | 16 +++++ net/core/netdev-genl.c | 13 +++- net/xdp/xsk.c | 34 ++++++++++ net/xdp/xsk_queue.h | 2 +- tools/include/uapi/linux/if_xdp.h | 52 +++++++++++++-- tools/include/uapi/linux/netdev.h | 16 +++++ tools/net/ynl/generated/netdev-user.c | 19 ++++++ tools/net/ynl/generated/netdev-user.h | 3 + 15 files changed, 348 insertions(+), 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 14511b13f305..00439bcbd2e3 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -45,7 +45,6 @@ definitions: - type: flags name: xdp-rx-metadata - render-max: true entries: - name: timestamp @@ -55,6 +54,18 @@ definitions: name: hash doc: Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash(). + - + type: flags + name: xsk-flags + entries: + - + name: tx-timestamp + doc: + HW timestamping egress packets is supported by the driver. + - + name: tx-checksum + doc: + L3 checksum HW offload is supported by the driver. attribute-sets: - @@ -86,6 +97,11 @@ attribute-sets: See Documentation/networking/xdp-rx-metadata.rst for more details. type: u64 enum: xdp-rx-metadata + - + name: xsk-features + doc: Bitmask of enabled AF_XDP features. + type: u64 + enum: xsk-flags operations: list: @@ -103,6 +119,7 @@ operations: - xdp-features - xdp-zc-max-segs - xdp-rx-metadata-features + - xsk-features dump: reply: *dev-all - diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e87caa81f70c..08da8b28c816 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1865,6 +1865,7 @@ enum netdev_stat_type { * @netdev_ops: Includes several pointers to callbacks, * if one wants to override the ndo_*() functions * @xdp_metadata_ops: Includes pointers to XDP metadata callbacks. + * @xsk_tx_metadata_ops: Includes pointers to AF_XDP TX metadata callbacks. * @ethtool_ops: Management operations * @l3mdev_ops: Layer 3 master device operations * @ndisc_ops: Includes callbacks for different IPv6 neighbour @@ -2128,6 +2129,7 @@ struct net_device { unsigned long long priv_flags; const struct net_device_ops *netdev_ops; const struct xdp_metadata_ops *xdp_metadata_ops; + const struct xsk_tx_metadata_ops *xsk_tx_metadata_ops; int ifindex; unsigned short gflags; unsigned short hard_header_len; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 27998f73183e..b370eb8d70f7 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -566,6 +566,15 @@ struct ubuf_info_msgzc { int mm_account_pinned_pages(struct mmpin *mmp, size_t size); void mm_unaccount_pinned_pages(struct mmpin *mmp); +/* Preserve some data across TX submission and completion. + * + * Note, this state is stored in the driver. Extending the layout + * might need some special care. + */ +struct xsk_tx_metadata_compl { + __u64 *tx_timestamp; +}; + /* This data is invariant across clones and lives at * the end of the header data, ie. at skb->end. */ @@ -578,7 +587,10 @@ struct skb_shared_info { /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; struct sk_buff *frag_list; - struct skb_shared_hwtstamps hwtstamps; + union { + struct skb_shared_hwtstamps hwtstamps; + struct xsk_tx_metadata_compl xsk_meta; + }; unsigned int gso_type; u32 tskey; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index bcf765124f72..3cb4dc9bd70e 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -93,12 +93,105 @@ struct xdp_sock { struct xsk_queue *cq_tmp; /* Only as tmp storage before bind */ }; +/* + * AF_XDP TX metadata hooks for network devices. + * The following hooks can be defined; unless noted otherwise, they are + * optional and can be filled with a null pointer. + * + * void (*tmo_request_timestamp)(void *priv) + * Called when AF_XDP frame requested egress timestamp. + * + * u64 (*tmo_fill_timestamp)(void *priv) + * Called when AF_XDP frame, that had requested egress timestamp, + * received a completion. The hook needs to return the actual HW timestamp. + * + * void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv) + * Called when AF_XDP frame requested HW checksum offload. csum_start + * indicates position where checksumming should start. + * csum_offset indicates position where checksum should be stored. + * + */ +struct xsk_tx_metadata_ops { + void (*tmo_request_timestamp)(void *priv); + u64 (*tmo_fill_timestamp)(void *priv); + void (*tmo_request_checksum)(u16 csum_start, u16 csum_offset, void *priv); +}; + #ifdef CONFIG_XDP_SOCKETS int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); void __xsk_map_flush(void); +/** + * xsk_tx_metadata_to_compl - Save enough relevant metadata information + * to perform tx completion in the future. + * @meta: pointer to AF_XDP metadata area + * @compl: pointer to output struct xsk_tx_metadata_to_compl + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. The value of @compl should be stored + * and passed to xsk_tx_metadata_complete upon TX completion. + */ +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ + if (!meta) + return; + + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + compl->tx_timestamp = &meta->completion.tx_timestamp; + else + compl->tx_timestamp = NULL; +} + +/** + * xsk_tx_metadata_request - Evaluate AF_XDP TX metadata at submission + * and call appropriate xsk_tx_metadata_ops operation. + * @meta: pointer to AF_XDP metadata area + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device when + * it prepares AF_XDP egress packet. + */ +static inline void xsk_tx_metadata_request(const struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!meta) + return; + + if (ops->tmo_request_timestamp) + if (meta->flags & XDP_TXMD_FLAGS_TIMESTAMP) + ops->tmo_request_timestamp(priv); + + if (ops->tmo_request_checksum) + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) + ops->tmo_request_checksum(meta->request.csum_start, + meta->request.csum_offset, priv); +} + +/** + * xsk_tx_metadata_complete - Evaluate AF_XDP TX metadata at completion + * and call appropriate xsk_tx_metadata_ops operation. + * @compl: pointer to completion metadata produced from xsk_tx_metadata_to_compl + * @ops: pointer to struct xsk_tx_metadata_ops + * @priv: pointer to driver-private aread + * + * This function should be called by the networking device upon + * AF_XDP egress completion. + */ +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ + if (!compl) + return; + + *compl->tx_timestamp = ops->tmo_fill_timestamp(priv); +} + #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) @@ -115,6 +208,23 @@ static inline void __xsk_map_flush(void) { } +static inline void xsk_tx_metadata_to_compl(struct xsk_tx_metadata *meta, + struct xsk_tx_metadata_compl *compl) +{ +} + +static inline void xsk_tx_metadata_request(struct xsk_tx_metadata *meta, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + +static inline void xsk_tx_metadata_complete(struct xsk_tx_metadata_compl *compl, + const struct xsk_tx_metadata_ops *ops, + void *priv) +{ +} + #endif /* CONFIG_XDP_SOCKETS */ #if defined(CONFIG_XDP_SOCKETS) && defined(CONFIG_DEBUG_NET) diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h index 1f6fc8c7a84c..e2558ac3e195 100644 --- a/include/net/xdp_sock_drv.h +++ b/include/net/xdp_sock_drv.h @@ -165,6 +165,14 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return xp_raw_get_data(pool, addr); } +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + if (!pool->tx_metadata_len) + return NULL; + + return xp_raw_get_data(pool, addr) - pool->tx_metadata_len; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp); @@ -324,6 +332,11 @@ static inline void *xsk_buff_raw_get_data(struct xsk_buff_pool *pool, u64 addr) return NULL; } +static inline struct xsk_tx_metadata *xsk_buff_get_metadata(struct xsk_buff_pool *pool, u64 addr) +{ + return NULL; +} + static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp, struct xsk_buff_pool *pool) { } diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 1985ffaf9b0c..97f5cc10d79e 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -33,6 +33,7 @@ struct xdp_buff_xsk { }; #define XSK_CHECK_PRIV_TYPE(t) BUILD_BUG_ON(sizeof(t) > offsetofend(struct xdp_buff_xsk, cb)) +#define XSK_TX_COMPL_FITS(t) BUILD_BUG_ON(sizeof(struct xsk_tx_metadata_compl) > sizeof(t)) struct xsk_dma_map { dma_addr_t *dma_pages; @@ -234,4 +235,9 @@ static inline u64 xp_get_handle(struct xdp_buff_xsk *xskb) return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT); } +static inline bool xp_tx_metadata_enabled(const struct xsk_buff_pool *pool) +{ + return pool->tx_metadata_len > 0; +} + #endif /* XSK_BUFF_POOL_H_ */ diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 2ecf79282c26..95de66d5a26c 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -106,6 +106,41 @@ struct xdp_options { #define XSK_UNALIGNED_BUF_ADDR_MASK \ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) +/* Request transmit timestamp. Upon completion, put it into tx_timestamp + * field of struct xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) + +/* Request transmit checksum offload. Checksum start position and offset + * are communicated via csum_start and csum_offset fields of struct + * xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) + +/* AF_XDP offloads request. 'request' union member is consumed by the driver + * when the packet is being transmitted. 'completion' union member is + * filled by the driver when the transmit completion arrives. + */ +struct xsk_tx_metadata { + __u64 flags; + + union { + struct { + /* XDP_TXMD_FLAGS_CHECKSUM */ + + /* Offset from desc->addr where checksumming should start. */ + __u16 csum_start; + /* Offset from csum_start where checksum should be stored. */ + __u16 csum_offset; + } request; + + struct { + /* XDP_TXMD_FLAGS_TIMESTAMP */ + __u64 tx_timestamp; + } completion; + }; +}; + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; @@ -122,4 +157,7 @@ struct xdp_desc { */ #define XDP_PKT_CONTD (1 << 0) +/* TX packet carries valid metadata. */ +#define XDP_TX_METADATA (1 << 1) + #endif /* _LINUX_IF_XDP_H */ diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 2943a151d4f1..48d5477a668c 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -53,12 +53,28 @@ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_MASK = 3, }; +/** + * enum netdev_xsk_flags + * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported + * by the driver. + * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the + * driver. + */ +enum netdev_xsk_flags { + NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, + NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + + /* private: */ + NETDEV_XSK_FLAGS_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + NETDEV_A_DEV_XSK_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index fe61f85bcf33..10f2124e9e23 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "netdev-genl-gen.h" @@ -13,6 +14,7 @@ static int netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, const struct genl_info *info) { + u64 xsk_features = 0; u64 xdp_rx_meta = 0; void *hdr; @@ -26,11 +28,20 @@ netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp, XDP_METADATA_KFUNC_xxx #undef XDP_METADATA_KFUNC + if (netdev->xsk_tx_metadata_ops) { + if (netdev->xsk_tx_metadata_ops->tmo_fill_timestamp) + xsk_features |= NETDEV_XSK_FLAGS_TX_TIMESTAMP; + if (netdev->xsk_tx_metadata_ops->tmo_request_checksum) + xsk_features |= NETDEV_XSK_FLAGS_TX_CHECKSUM; + } + if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES, netdev->xdp_features, NETDEV_A_DEV_PAD) || nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, - xdp_rx_meta, NETDEV_A_DEV_PAD)) { + xdp_rx_meta, NETDEV_A_DEV_PAD) || + nla_put_u64_64bit(rsp, NETDEV_A_DEV_XSK_FEATURES, + xsk_features, NETDEV_A_DEV_PAD)) { genlmsg_cancel(rsp, hdr); return -EINVAL; } diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index c904356e2800..e83ade32f1fd 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -571,6 +571,13 @@ static u32 xsk_get_num_desc(struct sk_buff *skb) static void xsk_destruct_skb(struct sk_buff *skb) { + struct xsk_tx_metadata_compl *compl = &skb_shinfo(skb)->xsk_meta; + + if (compl->tx_timestamp) { + /* sw completion timestamp, not a real one */ + *compl->tx_timestamp = ktime_get_tai_fast_ns(); + } + xsk_cq_submit_locked(xdp_sk(skb->sk), xsk_get_num_desc(skb)); sock_wfree(skb); } @@ -655,8 +662,10 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs, static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, struct xdp_desc *desc) { + struct xsk_tx_metadata *meta = NULL; struct net_device *dev = xs->dev; struct sk_buff *skb = xs->skb; + bool first_frag = false; int err; if (dev->priv_flags & IFF_TX_SKB_NO_LINEAR) { @@ -687,6 +696,8 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, kfree_skb(skb); goto free_err; } + + first_frag = true; } else { int nr_frags = skb_shinfo(skb)->nr_frags; struct page *page; @@ -709,12 +720,35 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb_add_rx_frag(skb, nr_frags, page, 0, len, 0); } + + if (first_frag && desc->options & XDP_TX_METADATA) { + if (unlikely(xs->pool->tx_metadata_len == 0)) { + err = -EINVAL; + goto free_err; + } + + meta = buffer - xs->pool->tx_metadata_len; + + if (meta->flags & XDP_TXMD_FLAGS_CHECKSUM) { + if (unlikely(meta->request.csum_start + + meta->request.csum_offset + + sizeof(__sum16) > len)) { + err = -EINVAL; + goto free_err; + } + + skb->csum_start = hr + meta->request.csum_start; + skb->csum_offset = meta->request.csum_offset; + skb->ip_summed = CHECKSUM_PARTIAL; + } + } } skb->dev = dev; skb->priority = READ_ONCE(xs->sk.sk_priority); skb->mark = READ_ONCE(xs->sk.sk_mark); skb->destructor = xsk_destruct_skb; + xsk_tx_metadata_to_compl(meta, &skb_shinfo(skb)->xsk_meta); xsk_set_destructor_arg(skb); return skb; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index c74a1372bcb9..6f2d1621c992 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -137,7 +137,7 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr) static inline bool xp_unused_options_set(u32 options) { - return options & ~XDP_PKT_CONTD; + return options & ~(XDP_PKT_CONTD | XDP_TX_METADATA); } static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool, diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index 34411a2e5b6c..d0882edc1642 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -26,11 +26,11 @@ */ #define XDP_USE_NEED_WAKEUP (1 << 3) /* By setting this option, userspace application indicates that it can - * handle multiple descriptors per packet thus enabling xsk core to split + * handle multiple descriptors per packet thus enabling AF_XDP to split * multi-buffer XDP frames into multiple Rx descriptors. Without this set - * such frames will be dropped by xsk. + * such frames will be dropped. */ -#define XDP_USE_SG (1 << 4) +#define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ #define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) @@ -106,6 +106,41 @@ struct xdp_options { #define XSK_UNALIGNED_BUF_ADDR_MASK \ ((1ULL << XSK_UNALIGNED_BUF_OFFSET_SHIFT) - 1) +/* Request transmit timestamp. Upon completion, put it into tx_timestamp + * field of union xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_TIMESTAMP (1 << 0) + +/* Request transmit checksum offload. Checksum start position and offset + * are communicated via csum_start and csum_offset fields of union + * xsk_tx_metadata. + */ +#define XDP_TXMD_FLAGS_CHECKSUM (1 << 1) + +/* AF_XDP offloads request. 'request' union member is consumed by the driver + * when the packet is being transmitted. 'completion' union member is + * filled by the driver when the transmit completion arrives. + */ +struct xsk_tx_metadata { + __u64 flags; + + union { + struct { + /* XDP_TXMD_FLAGS_CHECKSUM */ + + /* Offset from desc->addr where checksumming should start. */ + __u16 csum_start; + /* Offset from csum_start where checksum should be stored. */ + __u16 csum_offset; + } request; + + struct { + /* XDP_TXMD_FLAGS_TIMESTAMP */ + __u64 tx_timestamp; + } completion; + }; +}; + /* Rx/Tx descriptor */ struct xdp_desc { __u64 addr; @@ -113,9 +148,16 @@ struct xdp_desc { __u32 options; }; -/* Flag indicating packet constitutes of multiple buffers*/ +/* UMEM descriptor is __u64 */ + +/* Flag indicating that the packet continues with the buffer pointed out by the + * next frame in the ring. The end of the packet is signalled by setting this + * bit to zero. For single buffer packets, every descriptor has 'options' set + * to 0 and this maintains backward compatibility. + */ #define XDP_PKT_CONTD (1 << 0) -/* UMEM descriptor is __u64 */ +/* TX packet carries valid metadata. */ +#define XDP_TX_METADATA (1 << 1) #endif /* _LINUX_IF_XDP_H */ diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 2943a151d4f1..48d5477a668c 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -53,12 +53,28 @@ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_MASK = 3, }; +/** + * enum netdev_xsk_flags + * @NETDEV_XSK_FLAGS_TX_TIMESTAMP: HW timestamping egress packets is supported + * by the driver. + * @NETDEV_XSK_FLAGS_TX_CHECKSUM: L3 checksum HW offload is supported by the + * driver. + */ +enum netdev_xsk_flags { + NETDEV_XSK_FLAGS_TX_TIMESTAMP = 1, + NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, + + /* private: */ + NETDEV_XSK_FLAGS_MASK = 3, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, NETDEV_A_DEV_XDP_FEATURES, NETDEV_A_DEV_XDP_ZC_MAX_SEGS, NETDEV_A_DEV_XDP_RX_METADATA_FEATURES, + NETDEV_A_DEV_XSK_FEATURES, __NETDEV_A_DEV_MAX, NETDEV_A_DEV_MAX = (__NETDEV_A_DEV_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index b5ffe8cd1144..6283d87dad37 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -58,6 +58,19 @@ const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value) return netdev_xdp_rx_metadata_strmap[value]; } +static const char * const netdev_xsk_flags_strmap[] = { + [0] = "tx-timestamp", + [1] = "tx-checksum", +}; + +const char *netdev_xsk_flags_str(enum netdev_xsk_flags value) +{ + value = ffs(value) - 1; + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_xsk_flags_strmap)) + return NULL; + return netdev_xsk_flags_strmap[value]; +} + /* Policies */ struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { [NETDEV_A_DEV_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, @@ -65,6 +78,7 @@ struct ynl_policy_attr netdev_dev_policy[NETDEV_A_DEV_MAX + 1] = { [NETDEV_A_DEV_XDP_FEATURES] = { .name = "xdp-features", .type = YNL_PT_U64, }, [NETDEV_A_DEV_XDP_ZC_MAX_SEGS] = { .name = "xdp-zc-max-segs", .type = YNL_PT_U32, }, [NETDEV_A_DEV_XDP_RX_METADATA_FEATURES] = { .name = "xdp-rx-metadata-features", .type = YNL_PT_U64, }, + [NETDEV_A_DEV_XSK_FEATURES] = { .name = "xsk-features", .type = YNL_PT_U64, }, }; struct ynl_policy_nest netdev_dev_nest = { @@ -116,6 +130,11 @@ int netdev_dev_get_rsp_parse(const struct nlmsghdr *nlh, void *data) return MNL_CB_ERROR; dst->_present.xdp_rx_metadata_features = 1; dst->xdp_rx_metadata_features = mnl_attr_get_u64(attr); + } else if (type == NETDEV_A_DEV_XSK_FEATURES) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.xsk_features = 1; + dst->xsk_features = mnl_attr_get_u64(attr); } } diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 4fafac879df3..39af1908444b 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -19,6 +19,7 @@ extern const struct ynl_family ynl_netdev_family; const char *netdev_op_str(int op); const char *netdev_xdp_act_str(enum netdev_xdp_act value); const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); +const char *netdev_xsk_flags_str(enum netdev_xsk_flags value); /* Common nested types */ /* ============== NETDEV_CMD_DEV_GET ============== */ @@ -50,12 +51,14 @@ struct netdev_dev_get_rsp { __u32 xdp_features:1; __u32 xdp_zc_max_segs:1; __u32 xdp_rx_metadata_features:1; + __u32 xsk_features:1; } _present; __u32 ifindex; __u64 xdp_features; __u32 xdp_zc_max_segs; __u64 xdp_rx_metadata_features; + __u64 xsk_features; }; void netdev_dev_get_rsp_free(struct netdev_dev_get_rsp *rsp); -- cgit v1.3.1 From 9620e956d5b592d305728144931e89d780a598aa Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:12 -0800 Subject: xsk: Document tx_metadata_len layout - how to use - how to query features - pointers to the examples Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-7-sdf@google.com Signed-off-by: Alexei Starovoitov --- Documentation/networking/index.rst | 1 + Documentation/networking/xdp-rx-metadata.rst | 2 + Documentation/networking/xsk-tx-metadata.rst | 70 ++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+) create mode 100644 Documentation/networking/xsk-tx-metadata.rst (limited to 'Documentation') diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 683eb42309cc..a297a894b366 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -123,6 +123,7 @@ Contents: xfrm_sync xfrm_sysctl xdp-rx-metadata + xsk-tx-metadata .. only:: subproject and html diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index 205696780b78..e3e9420fd817 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + =============== XDP RX Metadata =============== diff --git a/Documentation/networking/xsk-tx-metadata.rst b/Documentation/networking/xsk-tx-metadata.rst new file mode 100644 index 000000000000..4f376560b23f --- /dev/null +++ b/Documentation/networking/xsk-tx-metadata.rst @@ -0,0 +1,70 @@ +================== +AF_XDP TX Metadata +================== + +This document describes how to enable offloads when transmitting packets +via :doc:`af_xdp`. Refer to :doc:`xdp-rx-metadata` on how to access similar +metadata on the receive side. + +General Design +============== + +The headroom for the metadata is reserved via ``tx_metadata_len`` in +``struct xdp_umem_reg``. The metadata length is therefore the same for +every socket that shares the same umem. The metadata layout is a fixed UAPI, +refer to ``union xsk_tx_metadata`` in ``include/uapi/linux/if_xdp.h``. +Thus, generally, the ``tx_metadata_len`` field above should contain +``sizeof(union xsk_tx_metadata)``. + +The headroom and the metadata itself should be located right before +``xdp_desc->addr`` in the umem frame. Within a frame, the metadata +layout is as follows:: + + tx_metadata_len + / \ + +-----------------+---------+----------------------------+ + | xsk_tx_metadata | padding | payload | + +-----------------+---------+----------------------------+ + ^ + | + xdp_desc->addr + +An AF_XDP application can request headrooms larger than ``sizeof(struct +xsk_tx_metadata)``. The kernel will ignore the padding (and will still +use ``xdp_desc->addr - tx_metadata_len`` to locate +the ``xsk_tx_metadata``). For the frames that shouldn't carry +any metadata (i.e., the ones that don't have ``XDP_TX_METADATA`` option), +the metadata area is ignored by the kernel as well. + +The flags field enables the particular offload: + +- ``XDP_TXMD_FLAGS_TIMESTAMP``: requests the device to put transmission + timestamp into ``tx_timestamp`` field of ``union xsk_tx_metadata``. +- ``XDP_TXMD_FLAGS_CHECKSUM``: requests the device to calculate L4 + checksum. ``csum_start`` specifies byte offset of where the checksumming + should start and ``csum_offset`` specifies byte offset where the + device should store the computed checksum. + +Besides the flags above, in order to trigger the offloads, the first +packet's ``struct xdp_desc`` descriptor should set ``XDP_TX_METADATA`` +bit in the ``options`` field. Also note that in a multi-buffer packet +only the first chunk should carry the metadata. + +Querying Device Capabilities +============================ + +Every devices exports its offloads capabilities via netlink netdev family. +Refer to ``xsk-flags`` features bitmask in +``Documentation/netlink/specs/netdev.yaml``. + +- ``tx-timestamp``: device supports ``XDP_TXMD_FLAGS_TIMESTAMP`` +- ``tx-checksum``: device supports ``XDP_TXMD_FLAGS_CHECKSUM`` + +See ``tools/net/ynl/samples/netdev.c`` on how to query this information. + +Example +======= + +See ``tools/testing/selftests/bpf/xdp_hw_metadata.c`` for an example +program that handles TX metadata. Also see https://github.com/fomichev/xskgen +for a more bare-bones example. -- cgit v1.3.1 From 11614723af26e7c32fcb704d8f30fdf60c1122dc Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 27 Nov 2023 11:03:14 -0800 Subject: xsk: Add option to calculate TX checksum in SW For XDP_COPY mode, add a UMEM option XDP_UMEM_TX_SW_CSUM to call skb_checksum_help in transmit path. Might be useful to debugging issues with real hardware. I also use this mode in the selftests. Signed-off-by: Stanislav Fomichev Link: https://lore.kernel.org/r/20231127190319.1190813-9-sdf@google.com Signed-off-by: Alexei Starovoitov --- Documentation/networking/xsk-tx-metadata.rst | 9 +++++++++ include/net/xsk_buff_pool.h | 1 + include/uapi/linux/if_xdp.h | 8 +++++++- net/xdp/xdp_umem.c | 7 ++++++- net/xdp/xsk.c | 6 ++++++ net/xdp/xsk_buff_pool.c | 1 + tools/include/uapi/linux/if_xdp.h | 8 +++++++- 7 files changed, 37 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/xsk-tx-metadata.rst b/Documentation/networking/xsk-tx-metadata.rst index 4f376560b23f..97ecfa480d00 100644 --- a/Documentation/networking/xsk-tx-metadata.rst +++ b/Documentation/networking/xsk-tx-metadata.rst @@ -50,6 +50,15 @@ packet's ``struct xdp_desc`` descriptor should set ``XDP_TX_METADATA`` bit in the ``options`` field. Also note that in a multi-buffer packet only the first chunk should carry the metadata. +Software TX Checksum +==================== + +For development and testing purposes its possible to pass +``XDP_UMEM_TX_SW_CSUM`` flag to ``XDP_UMEM_REG`` UMEM registration call. +In this case, when running in ``XDK_COPY`` mode, the TX checksum +is calculated on the CPU. Do not enable this option in production because +it will negatively affect performance. + Querying Device Capabilities ============================ diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h index 97f5cc10d79e..8d48d37ab7c0 100644 --- a/include/net/xsk_buff_pool.h +++ b/include/net/xsk_buff_pool.h @@ -83,6 +83,7 @@ struct xsk_buff_pool { bool uses_need_wakeup; bool dma_need_sync; bool unaligned; + bool tx_sw_csum; void *addrs; /* Mutual exclusion of the completion ring in the SKB mode. Two cases to protect: * NAPI TX thread and sendmsg error paths in the SKB destructor callback and when diff --git a/include/uapi/linux/if_xdp.h b/include/uapi/linux/if_xdp.h index 95de66d5a26c..d31698410410 100644 --- a/include/uapi/linux/if_xdp.h +++ b/include/uapi/linux/if_xdp.h @@ -33,7 +33,13 @@ #define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ -#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) + +/* Force checksum calculation in software. Can be used for testing or + * working around potential HW issues. This option causes performance + * degradation and only works in XDP_COPY mode. + */ +#define XDP_UMEM_TX_SW_CSUM (1 << 1) struct sockaddr_xdp { __u16 sxdp_family; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 946a687fb8e8..caa340134b0e 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -148,6 +148,11 @@ static int xdp_umem_account_pages(struct xdp_umem *umem) return 0; } +#define XDP_UMEM_FLAGS_VALID ( \ + XDP_UMEM_UNALIGNED_CHUNK_FLAG | \ + XDP_UMEM_TX_SW_CSUM | \ + 0) + static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) { bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; @@ -167,7 +172,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return -EINVAL; } - if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) + if (mr->flags & ~XDP_UMEM_FLAGS_VALID) return -EINVAL; if (!unaligned_chunks && !is_power_of_2(chunk_size)) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index d66ba9d6154f..281d49b4fca4 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -744,6 +744,12 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs, skb->csum_start = hr + meta->request.csum_start; skb->csum_offset = meta->request.csum_offset; skb->ip_summed = CHECKSUM_PARTIAL; + + if (unlikely(xs->pool->tx_sw_csum)) { + err = skb_checksum_help(skb); + if (err) + goto free_err; + } } } } diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 386eddcdf837..4f6f538a5462 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -86,6 +86,7 @@ struct xsk_buff_pool *xp_create_and_assign_umem(struct xdp_sock *xs, pool->umem = umem; pool->addrs = umem->addrs; pool->tx_metadata_len = umem->tx_metadata_len; + pool->tx_sw_csum = umem->flags & XDP_UMEM_TX_SW_CSUM; INIT_LIST_HEAD(&pool->free_list); INIT_LIST_HEAD(&pool->xskb_list); INIT_LIST_HEAD(&pool->xsk_tx_list); diff --git a/tools/include/uapi/linux/if_xdp.h b/tools/include/uapi/linux/if_xdp.h index d0882edc1642..638c606dfa74 100644 --- a/tools/include/uapi/linux/if_xdp.h +++ b/tools/include/uapi/linux/if_xdp.h @@ -33,7 +33,13 @@ #define XDP_USE_SG (1 << 4) /* Flags for xsk_umem_config flags */ -#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) +#define XDP_UMEM_UNALIGNED_CHUNK_FLAG (1 << 0) + +/* Force checksum calculation in software. Can be used for testing or + * working around potential HW issues. This option causes performance + * degradation and only works in XDP_COPY mode. + */ +#define XDP_UMEM_TX_SW_CSUM (1 << 1) struct sockaddr_xdp { __u16 sxdp_family; -- cgit v1.3.1 From fbb7033b76eb0639a6f24d48e59f28407a4dee64 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Nov 2023 16:43:04 +0100 Subject: dt-bindings: net: dsa: Require ports or ethernet-ports Bindings using dsa.yaml#/$defs/ethernet-ports specify that a DSA switch node need to have a ports or ethernet-ports subnode, and that is actually required, so add requirements using oneOf. Suggested-by: Rob Herring Acked-by: Florian Fainelli Reviewed-by: Krzysztof Kozlowski Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20231127-marvell-88e6152-wan-led-v9-1-272934e04681@linaro.org Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/dsa/dsa.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.yaml b/Documentation/devicetree/bindings/net/dsa/dsa.yaml index 6107189d276a..2abd036578d1 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.yaml +++ b/Documentation/devicetree/bindings/net/dsa/dsa.yaml @@ -46,4 +46,10 @@ $defs: $ref: dsa-port.yaml# unevaluatedProperties: false +oneOf: + - required: + - ports + - required: + - ethernet-ports + ... -- cgit v1.3.1 From a6e44f3028e7d582da625a611ef17908844d0e82 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Nov 2023 16:43:05 +0100 Subject: dt-bindings: net: mvusb: Fix up DSA example When adding a proper schema for the Marvell mx88e6xxx switch, the scripts start complaining about this embedded example: dtschema/dtc warnings/errors: net/marvell,mvusb.example.dtb: switch@0: ports: '#address-cells' is a required property from schema $id: http://devicetree.org/schemas/net/dsa/marvell,mv88e6xxx.yaml# net/marvell,mvusb.example.dtb: switch@0: ports: '#size-cells' is a required property from schema $id: http://devicetree.org/schemas/net/dsa/marvell,mv88e6xxx.yaml# Fix this up by extending the example with those properties in the ports node. While we are at it, rename "ports" to "ethernet-ports" and rename "switch" to "ethernet-switch" as this is recommended practice. Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Reviewed-by: Rob Herring Reviewed-by: Florian Fainelli Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20231127-marvell-88e6152-wan-led-v9-2-272934e04681@linaro.org Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/marvell,mvusb.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/marvell,mvusb.yaml b/Documentation/devicetree/bindings/net/marvell,mvusb.yaml index 3a3325168048..ab838c1ffeed 100644 --- a/Documentation/devicetree/bindings/net/marvell,mvusb.yaml +++ b/Documentation/devicetree/bindings/net/marvell,mvusb.yaml @@ -50,11 +50,14 @@ examples: #address-cells = <1>; #size-cells = <0>; - switch@0 { + ethernet-switch@0 { compatible = "marvell,mv88e6190"; reg = <0x0>; - ports { + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + /* Port definitions */ }; -- cgit v1.3.1 From f45c197465ed92c72c1af7ac773ca5050bd9535a Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Nov 2023 16:43:06 +0100 Subject: dt-bindings: net: ethernet-switch: Accept special variants Accept special node naming variants for Marvell switches with special node names as ABI. This is maybe not the prettiest but it avoids special-casing the Marvell MV88E6xxx bindings by copying a lot of generic binding code down into that one binding just to special-case these unfixable nodes. Reviewed-by: Rob Herring Reviewed-by: Andrew Lunn Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231127-marvell-88e6152-wan-led-v9-3-272934e04681@linaro.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/ethernet-switch.yaml | 23 +++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/ethernet-switch.yaml b/Documentation/devicetree/bindings/net/ethernet-switch.yaml index 72ac67ca3415..b3b7e1a1b127 100644 --- a/Documentation/devicetree/bindings/net/ethernet-switch.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-switch.yaml @@ -20,9 +20,26 @@ description: select: false -properties: - $nodename: - pattern: "^(ethernet-)?switch(@.*)?$" +allOf: + # This condition is here to satisfy the case where certain device + # nodes have to preserve non-standard names because of + # backward-compatibility with boot loaders inspecting certain + # node names. + - if: + properties: + compatible: + contains: + enum: + - marvell,turris-mox-mv88e6085 + - marvell,turris-mox-mv88e6190 + then: + properties: + $nodename: + pattern: "switch[0-3]@[0-3]+$" + else: + properties: + $nodename: + pattern: "^(ethernet-)?switch(@.*)?$" patternProperties: "^(ethernet-)?ports$": -- cgit v1.3.1 From 43915b2f4bb9fc0e098fa703d508027b58f442fa Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Nov 2023 16:43:07 +0100 Subject: dt-bindings: marvell: Rewrite MV88E6xxx in schema This is an attempt to rewrite the Marvell MV88E6xxx switch bindings in YAML schema. The current text binding says: WARNING: This binding is currently unstable. Do not program it into a FLASH never to be changed again. Once this binding is stable, this warning will be removed. Well that never happened before we switched to YAML markup, we can't have it like this, what about fixing the mess? Reviewed-by: Andrew Lunn Reviewed-by: Rob Herring Reviewed-by: Florian Fainelli Signed-off-by: Linus Walleij Reviewed-by: Vladimir Oltean Link: https://lore.kernel.org/r/20231127-marvell-88e6152-wan-led-v9-4-272934e04681@linaro.org Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/marvell,mv88e6xxx.yaml | 337 +++++++++++++++++++++ .../devicetree/bindings/net/dsa/marvell.txt | 109 ------- MAINTAINERS | 2 +- 3 files changed, 338 insertions(+), 110 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/dsa/marvell,mv88e6xxx.yaml delete mode 100644 Documentation/devicetree/bindings/net/dsa/marvell.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/marvell,mv88e6xxx.yaml b/Documentation/devicetree/bindings/net/dsa/marvell,mv88e6xxx.yaml new file mode 100644 index 000000000000..19f15bdd1c97 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/marvell,mv88e6xxx.yaml @@ -0,0 +1,337 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/marvell,mv88e6xxx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell MV88E6xxx DSA switch family + +maintainers: + - Andrew Lunn + +description: + The Marvell MV88E6xxx switch series has been produced and sold + by Marvell since at least 2008. The switch has a few compatibles which + just indicate the base address of the switch, then operating systems + can investigate switch ID registers to find out which actual version + of the switch it is dealing with. + +properties: + compatible: + oneOf: + - enum: + - marvell,mv88e6085 + - marvell,mv88e6190 + - marvell,mv88e6250 + description: | + marvell,mv88e6085: This switch uses base address 0x10. + This switch and its siblings will be autodetected from + ID registers found in the switch, so only "marvell,mv88e6085" should be + specified. This includes the following list of MV88Exxxx switches: + 6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165, 6171, 6172, 6175, 6176, + 6185, 6240, 6320, 6321, 6341, 6350, 6351, 6352 + marvell,mv88e6190: This switch uses base address 0x00. + This switch and its siblings will be autodetected from + ID registers found in the switch, so only "marvell,mv88e6190" should be + specified. This includes the following list of MV88Exxxx switches: + 6190, 6190X, 6191, 6290, 6361, 6390, 6390X + marvell,mv88e6250: This switch uses base address 0x08 or 0x18. + This switch and its siblings will be autodetected from + ID registers found in the switch, so only "marvell,mv88e6250" should be + specified. This includes the following list of MV88Exxxx switches: + 6220, 6250 + - items: + - const: marvell,turris-mox-mv88e6085 + - const: marvell,mv88e6085 + - items: + - const: marvell,turris-mox-mv88e6190 + - const: marvell,mv88e6190 + + reg: + maxItems: 1 + + eeprom-length: + $ref: /schemas/types.yaml#/definitions/uint32 + description: Set to the length of an EEPROM connected to the switch. Must be + set if the switch can not detect the presence and/or size of a connected + EEPROM, otherwise optional. + + reset-gpios: + description: + GPIO to be used to reset the whole device + maxItems: 1 + + interrupts: + description: The switch provides an external interrupt line, but it is + not always used by target systems. + maxItems: 1 + + interrupt-controller: + description: The switch has an internal interrupt controller used by + the different sub-blocks. + + '#interrupt-cells': + description: The internal interrupt controller only supports triggering + on active high level interrupts so the second cell must alway be set to + IRQ_TYPE_LEVEL_HIGH. + const: 2 + + mdio: + $ref: /schemas/net/mdio.yaml# + unevaluatedProperties: false + description: Marvell MV88E6xxx switches have an varying combination of + internal and external MDIO buses, in some cases a combined bus that + can be used both internally and externally. This node is for the + primary bus, used internally and sometimes also externally. + + mdio-external: + $ref: /schemas/net/mdio.yaml# + unevaluatedProperties: false + description: Marvell MV88E6xxx switches that have a separate external + MDIO bus use this port to access external components on the MDIO bus. + + properties: + compatible: + const: marvell,mv88e6xxx-mdio-external + + required: + - compatible + +allOf: + - $ref: dsa.yaml#/$defs/ethernet-ports + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-switch@0 { + compatible = "marvell,mv88e6085"; + reg = <0>; + reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + sw_phy0: ethernet-phy@0 { + reg = <0x0>; + }; + + sw_phy1: ethernet-phy@1 { + reg = <0x1>; + }; + + sw_phy2: ethernet-phy@2 { + reg = <0x2>; + }; + + sw_phy3: ethernet-phy@3 { + reg = <0x3>; + }; + }; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-port@0 { + reg = <0>; + label = "lan4"; + phy-handle = <&sw_phy0>; + phy-mode = "internal"; + }; + + ethernet-port@1 { + reg = <1>; + label = "lan3"; + phy-handle = <&sw_phy1>; + phy-mode = "internal"; + }; + + ethernet-port@2 { + reg = <2>; + label = "lan2"; + phy-handle = <&sw_phy2>; + phy-mode = "internal"; + }; + + ethernet-port@3 { + reg = <3>; + label = "lan1"; + phy-handle = <&sw_phy3>; + phy-mode = "internal"; + }; + + ethernet-port@5 { + reg = <5>; + ethernet = <&fec>; + phy-mode = "rgmii-id"; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; + - | + #include + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-switch@0 { + compatible = "marvell,mv88e6190"; + #interrupt-cells = <2>; + interrupt-controller; + interrupt-parent = <&gpio1>; + interrupts = <7 IRQ_TYPE_LEVEL_LOW>; + pinctrl-0 = <&switch_interrupt_pins>; + pinctrl-names = "default"; + reg = <0>; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + switch0phy1: ethernet-phy@1 { + reg = <0x1>; + }; + + switch0phy2: ethernet-phy@2 { + reg = <0x2>; + }; + + switch0phy3: ethernet-phy@3 { + reg = <0x3>; + }; + + switch0phy4: ethernet-phy@4 { + reg = <0x4>; + }; + + switch0phy5: ethernet-phy@5 { + reg = <0x5>; + }; + + switch0phy6: ethernet-phy@6 { + reg = <0x6>; + }; + + switch0phy7: ethernet-phy@7 { + reg = <0x7>; + }; + + switch0phy8: ethernet-phy@8 { + reg = <0x8>; + }; + }; + + mdio-external { + compatible = "marvell,mv88e6xxx-mdio-external"; + #address-cells = <1>; + #size-cells = <0>; + + phy1: ethernet-phy@b { + reg = <0xb>; + compatible = "ethernet-phy-ieee802.3-c45"; + }; + + phy2: ethernet-phy@c { + reg = <0xc>; + compatible = "ethernet-phy-ieee802.3-c45"; + }; + }; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-port@0 { + ethernet = <ð0>; + phy-mode = "rgmii"; + reg = <0>; + + fixed-link { + full-duplex; + pause; + speed = <1000>; + }; + }; + + ethernet-port@1 { + label = "lan1"; + phy-handle = <&switch0phy1>; + reg = <1>; + }; + + ethernet-port@2 { + label = "lan2"; + phy-handle = <&switch0phy2>; + reg = <2>; + }; + + ethernet-port@3 { + label = "lan3"; + phy-handle = <&switch0phy3>; + reg = <3>; + }; + + ethernet-port@4 { + label = "lan4"; + phy-handle = <&switch0phy4>; + reg = <4>; + }; + + ethernet-port@5 { + label = "lan5"; + phy-handle = <&switch0phy5>; + reg = <5>; + }; + + ethernet-port@6 { + label = "lan6"; + phy-handle = <&switch0phy6>; + reg = <6>; + }; + + ethernet-port@7 { + label = "lan7"; + phy-handle = <&switch0phy7>; + reg = <7>; + }; + + ethernet-port@8 { + label = "lan8"; + phy-handle = <&switch0phy8>; + reg = <8>; + }; + + ethernet-port@9 { + /* 88X3310P external phy */ + label = "lan9"; + phy-handle = <&phy1>; + phy-mode = "xaui"; + reg = <9>; + }; + + ethernet-port@a { + /* 88X3310P external phy */ + label = "lan10"; + phy-handle = <&phy2>; + phy-mode = "xaui"; + reg = <0xa>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/dsa/marvell.txt b/Documentation/devicetree/bindings/net/dsa/marvell.txt deleted file mode 100644 index 6ec0c181b6db..000000000000 --- a/Documentation/devicetree/bindings/net/dsa/marvell.txt +++ /dev/null @@ -1,109 +0,0 @@ -Marvell DSA Switch Device Tree Bindings ---------------------------------------- - -WARNING: This binding is currently unstable. Do not program it into a -FLASH never to be changed again. Once this binding is stable, this -warning will be removed. - -If you need a stable binding, use the old dsa.txt binding. - -Marvell Switches are MDIO devices. The following properties should be -placed as a child node of an mdio device. - -The properties described here are those specific to Marvell devices. -Additional required and optional properties can be found in dsa.txt. - -The compatibility string is used only to find an identification register, -which is at a different MDIO base address in different switch families. -- "marvell,mv88e6085" : Switch has base address 0x10. Use with models: - 6085, 6095, 6097, 6123, 6131, 6141, 6161, 6165, - 6171, 6172, 6175, 6176, 6185, 6240, 6320, 6321, - 6341, 6350, 6351, 6352 -- "marvell,mv88e6190" : Switch has base address 0x00. Use with models: - 6190, 6190X, 6191, 6290, 6361, 6390, 6390X -- "marvell,mv88e6250" : Switch has base address 0x08 or 0x18. Use with model: - 6220, 6250 - -Required properties: -- compatible : Should be one of "marvell,mv88e6085", - "marvell,mv88e6190" or "marvell,mv88e6250" as - indicated above -- reg : Address on the MII bus for the switch. - -Optional properties: - -- reset-gpios : Should be a gpio specifier for a reset line -- interrupts : Interrupt from the switch -- interrupt-controller : Indicates the switch is itself an interrupt - controller. This is used for the PHY interrupts. -#interrupt-cells = <2> : Controller uses two cells, number and flag -- eeprom-length : Set to the length of an EEPROM connected to the - switch. Must be set if the switch can not detect - the presence and/or size of a connected EEPROM, - otherwise optional. -- mdio : Container of PHY and devices on the switches MDIO - bus. -- mdio? : Container of PHYs and devices on the external MDIO - bus. The node must contains a compatible string of - "marvell,mv88e6xxx-mdio-external" - -Example: - - mdio { - #address-cells = <1>; - #size-cells = <0>; - interrupt-parent = <&gpio0>; - interrupts = <27 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; - #interrupt-cells = <2>; - - switch0: switch@0 { - compatible = "marvell,mv88e6085"; - reg = <0>; - reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; - - mdio { - #address-cells = <1>; - #size-cells = <0>; - switch1phy0: switch1phy0@0 { - reg = <0>; - interrupt-parent = <&switch0>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; - }; - }; - }; - }; - - mdio { - #address-cells = <1>; - #size-cells = <0>; - interrupt-parent = <&gpio0>; - interrupts = <27 IRQ_TYPE_LEVEL_LOW>; - interrupt-controller; - #interrupt-cells = <2>; - - switch0: switch@0 { - compatible = "marvell,mv88e6190"; - reg = <0>; - reset-gpios = <&gpio5 1 GPIO_ACTIVE_LOW>; - - mdio { - #address-cells = <1>; - #size-cells = <0>; - switch1phy0: switch1phy0@0 { - reg = <0>; - interrupt-parent = <&switch0>; - interrupts = <0 IRQ_TYPE_LEVEL_HIGH>; - }; - }; - - mdio1 { - compatible = "marvell,mv88e6xxx-mdio-external"; - #address-cells = <1>; - #size-cells = <0>; - switch1phy9: switch1phy0@9 { - reg = <9>; - }; - }; - }; - }; diff --git a/MAINTAINERS b/MAINTAINERS index 4b1c2077e959..f3bb6688d1f5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12773,7 +12773,7 @@ MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER M: Andrew Lunn L: netdev@vger.kernel.org S: Maintained -F: Documentation/devicetree/bindings/net/dsa/marvell.txt +F: Documentation/devicetree/bindings/net/dsa/marvell,mv88e6xxx.yaml F: Documentation/networking/devlink/mv88e6xxx.rst F: drivers/net/dsa/mv88e6xxx/ F: include/linux/dsa/mv88e6xxx.h -- cgit v1.3.1 From 017ca9c9f31051bf6118e8557c78fe6471cca6fc Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Mon, 27 Nov 2023 16:43:08 +0100 Subject: dt-bindings: marvell: Add Marvell MV88E6060 DSA schema The Marvell MV88E6060 is one of the oldest DSA switches from Marvell, and it has DT bindings used in the wild. Let's define them properly. It is different enough from the rest of the MV88E6xxx switches that it deserves its own binding. Reviewed-by: Andrew Lunn Reviewed-by: Vladimir Oltean Reviewed-by: Rob Herring Reviewed-by: Florian Fainelli Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20231127-marvell-88e6152-wan-led-v9-5-272934e04681@linaro.org Signed-off-by: Jakub Kicinski --- .../bindings/net/dsa/marvell,mv88e6060.yaml | 88 ++++++++++++++++++++++ MAINTAINERS | 1 + 2 files changed, 89 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/dsa/marvell,mv88e6060.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/marvell,mv88e6060.yaml b/Documentation/devicetree/bindings/net/dsa/marvell,mv88e6060.yaml new file mode 100644 index 000000000000..4f1adf00431a --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/marvell,mv88e6060.yaml @@ -0,0 +1,88 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/marvell,mv88e6060.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Marvell MV88E6060 DSA switch + +maintainers: + - Andrew Lunn + +description: + The Marvell MV88E6060 switch has been produced and sold by Marvell + since at least 2008. The switch has one pin ADDR4 that controls the + MDIO address of the switch to be 0x10 or 0x00, and on the MDIO bus + connected to the switch, the PHYs inside the switch appear as + independent devices on address 0x00-0x04 or 0x10-0x14, so in difference + from many other DSA switches this switch does not have an internal + MDIO bus for the PHY devices. + +properties: + compatible: + const: marvell,mv88e6060 + description: + The MV88E6060 is the oldest Marvell DSA switch product, and + as such a bit limited in features compared to later hardware. + + reg: + maxItems: 1 + + reset-gpios: + description: + GPIO to be used to reset the whole device + maxItems: 1 + +allOf: + - $ref: dsa.yaml#/$defs/ethernet-ports + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + #include + #include + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-switch@16 { + compatible = "marvell,mv88e6060"; + reg = <16>; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-port@0 { + reg = <0>; + label = "lan1"; + }; + ethernet-port@1 { + reg = <1>; + label = "lan2"; + }; + ethernet-port@2 { + reg = <2>; + label = "lan3"; + }; + ethernet-port@3 { + reg = <3>; + label = "lan4"; + }; + ethernet-port@5 { + reg = <5>; + phy-mode = "rev-mii"; + ethernet = <ðc>; + fixed-link { + speed = <100>; + full-duplex; + }; + }; + }; + }; + }; diff --git a/MAINTAINERS b/MAINTAINERS index f3bb6688d1f5..dcdc7e86ca9c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12773,6 +12773,7 @@ MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER M: Andrew Lunn L: netdev@vger.kernel.org S: Maintained +F: Documentation/devicetree/bindings/net/dsa/marvell,mv88e6060.yaml F: Documentation/devicetree/bindings/net/dsa/marvell,mv88e6xxx.yaml F: Documentation/networking/devlink/mv88e6xxx.rst F: drivers/net/dsa/mv88e6xxx/ -- cgit v1.3.1 From 15d74e6588a158e481f38858de34011034957152 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 28 Nov 2023 12:52:54 +0100 Subject: Documentation: devlink: extend reload-reinit description Be more explicit about devlink entities that may stay and that have to be removed during reload reinit action. Signed-off-by: Jiri Pirko Reviewed-by: Przemek Kitszel Signed-off-by: Paolo Abeni --- Documentation/networking/devlink/devlink-reload.rst | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/devlink/devlink-reload.rst b/Documentation/networking/devlink/devlink-reload.rst index 505d22da027d..2fb0269b2054 100644 --- a/Documentation/networking/devlink/devlink-reload.rst +++ b/Documentation/networking/devlink/devlink-reload.rst @@ -22,8 +22,17 @@ By default ``driver_reinit`` action is selected. * - ``driver-reinit`` - Devlink driver entities re-initialization, including applying new values to devlink entities which are used during driver - load such as ``devlink-params`` in configuration mode - ``driverinit`` or ``devlink-resources`` + load which are: + + * ``devlink-params`` in configuration mode ``driverinit`` + * ``devlink-resources`` + + Other devlink entities may stay over the re-initialization: + + * ``devlink-health-reporter`` + * ``devlink-region`` + + The rest of the devlink entities have to be removed and readded. * - ``fw_activate`` - Firmware activate. Activates new firmware if such image is stored and pending activation. If no limitation specified this action may involve -- cgit v1.3.1 From 068b2b649fc1fa340b53114350457d40b31aeedb Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Tue, 28 Nov 2023 20:53:48 -0800 Subject: octeon_ep: support OCTEON CN98 devices Add PCI Endpoint NIC support for Octeon CN98 devices. CN98 devices are part of Octeon 9 family products with similar PCI NIC characteristics to CN93, already supported driver. Add CN98 card to the device id table, as well as support differences in the register fields and certain usage scenarios such as unload. Signed-off-by: Shinas Rasheed Link: https://lore.kernel.org/r/20231129045348.2538843-3-srasheed@marvell.com Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/marvell/octeon_ep.rst | 1 + .../net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c | 24 ++++++++++++++++++---- .../net/ethernet/marvell/octeon_ep/octep_main.c | 4 ++++ .../net/ethernet/marvell/octeon_ep/octep_main.h | 1 + .../marvell/octeon_ep/octep_regs_cn9k_pf.h | 4 ++++ 5 files changed, 30 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst index 613a818d5db6..c96d262b30be 100644 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep.rst @@ -22,6 +22,7 @@ EndPoint NIC. Supported Devices ================= Currently, this driver support following devices: + * Network controller: Cavium, Inc. Device b100 * Network controller: Cavium, Inc. Device b200 * Network controller: Cavium, Inc. Device b400 * Network controller: Cavium, Inc. Device b900 diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c index d4ee2454675b..8baabd07e91f 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_cn9k_pf.c @@ -216,9 +216,15 @@ static void octep_init_config_cn93_pf(struct octep_device *oct) conf->sriov_cfg.vf_srn = CN93_SDP_EPF_RINFO_SRN(val); val = octep_read_csr64(oct, CN93_SDP_MAC_PF_RING_CTL(oct->pcie_port)); - conf->pf_ring_cfg.srn = CN93_SDP_MAC_PF_RING_CTL_SRN(val); - conf->pf_ring_cfg.max_io_rings = CN93_SDP_MAC_PF_RING_CTL_RPPF(val); - conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings; + if (oct->chip_id == OCTEP_PCI_DEVICE_ID_CN98_PF) { + conf->pf_ring_cfg.srn = CN98_SDP_MAC_PF_RING_CTL_SRN(val); + conf->pf_ring_cfg.max_io_rings = CN98_SDP_MAC_PF_RING_CTL_RPPF(val); + conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings; + } else { + conf->pf_ring_cfg.srn = CN93_SDP_MAC_PF_RING_CTL_SRN(val); + conf->pf_ring_cfg.max_io_rings = CN93_SDP_MAC_PF_RING_CTL_RPPF(val); + conf->pf_ring_cfg.active_io_rings = conf->pf_ring_cfg.max_io_rings; + } dev_info(&pdev->dev, "pf_srn=%u rpvf=%u nvfs=%u rppf=%u\n", conf->pf_ring_cfg.srn, conf->sriov_cfg.active_rings_per_vf, conf->sriov_cfg.active_vfs, conf->pf_ring_cfg.active_io_rings); @@ -578,6 +584,13 @@ static irqreturn_t octep_ioq_intr_handler_cn93_pf(void *data) return IRQ_HANDLED; } +/* soft reset of 98xx */ +static int octep_soft_reset_cn98_pf(struct octep_device *oct) +{ + dev_info(&oct->pdev->dev, "CN98XX: skip soft reset\n"); + return 0; +} + /* soft reset of 93xx */ static int octep_soft_reset_cn93_pf(struct octep_device *oct) { @@ -806,7 +819,10 @@ void octep_device_setup_cn93_pf(struct octep_device *oct) oct->hw_ops.misc_intr_handler = octep_misc_intr_handler_cn93_pf; oct->hw_ops.rsvd_intr_handler = octep_rsvd_intr_handler_cn93_pf; oct->hw_ops.ioq_intr_handler = octep_ioq_intr_handler_cn93_pf; - oct->hw_ops.soft_reset = octep_soft_reset_cn93_pf; + if (oct->chip_id == OCTEP_PCI_DEVICE_ID_CN98_PF) + oct->hw_ops.soft_reset = octep_soft_reset_cn98_pf; + else + oct->hw_ops.soft_reset = octep_soft_reset_cn93_pf; oct->hw_ops.reinit_regs = octep_reinit_regs_cn93_pf; oct->hw_ops.enable_interrupts = octep_enable_interrupts_cn93_pf; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 423eec5ff3ad..1a24b3d3cce6 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -22,6 +22,7 @@ struct workqueue_struct *octep_wq; /* Supported Devices */ static const struct pci_device_id octep_pci_id_tbl[] = { + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN98_PF)}, {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_PF)}, {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_PF)}, {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_PF)}, @@ -1147,6 +1148,8 @@ static void octep_ctrl_mbox_task(struct work_struct *work) static const char *octep_devid_to_str(struct octep_device *oct) { switch (oct->chip_id) { + case OCTEP_PCI_DEVICE_ID_CN98_PF: + return "CN98XX"; case OCTEP_PCI_DEVICE_ID_CN93_PF: return "CN93XX"; case OCTEP_PCI_DEVICE_ID_CNF95N_PF: @@ -1197,6 +1200,7 @@ int octep_device_setup(struct octep_device *oct) dev_info(&pdev->dev, "chip_id = 0x%x\n", pdev->device); switch (oct->chip_id) { + case OCTEP_PCI_DEVICE_ID_CN98_PF: case OCTEP_PCI_DEVICE_ID_CN93_PF: case OCTEP_PCI_DEVICE_ID_CNF95N_PF: dev_info(&pdev->dev, "Setting up OCTEON %s PF PASS%d.%d\n", diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h index e2fe8b28eb0e..e1b4b2af618e 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.h @@ -18,6 +18,7 @@ #define OCTEP_PCIID_CN93_PF 0xB200177d #define OCTEP_PCIID_CN93_VF 0xB203177d +#define OCTEP_PCI_DEVICE_ID_CN98_PF 0xB100 #define OCTEP_PCI_DEVICE_ID_CN93_PF 0xB200 #define OCTEP_PCI_DEVICE_ID_CN93_VF 0xB203 diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h index 0a43983e9101..2e20a39d89af 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_regs_cn9k_pf.h @@ -362,6 +362,10 @@ #define CN93_SDP_MAC_PF_RING_CTL_SRN(val) (((val) >> 8) & 0xFF) #define CN93_SDP_MAC_PF_RING_CTL_RPPF(val) (((val) >> 16) & 0x3F) +#define CN98_SDP_MAC_PF_RING_CTL_NPFS(val) (((val) >> 48) & 0xF) +#define CN98_SDP_MAC_PF_RING_CTL_SRN(val) ((val) & 0xFF) +#define CN98_SDP_MAC_PF_RING_CTL_RPPF(val) (((val) >> 32) & 0x3F) + /* Number of non-queue interrupts in CN93xx */ #define CN93_NUM_NON_IOQ_INTR 16 -- cgit v1.3.1 From e8c780a5706066eba210e4c514968d95ba29e052 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 28 Nov 2023 20:14:27 -0800 Subject: docs: netlink: link to family documentations from spec info To increase the chances of people finding the rendered docs add a link to specs.rst and index.rst. Add a label in the generated index.rst and while at it adjust the title a little bit. Reviewed-by: Breno Leitao Reviewed-by: Donald Hunter Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231129041427.2763074-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/netlink/index.rst | 4 +++- Documentation/userspace-api/netlink/specs.rst | 2 +- tools/net/ynl/ynl-gen-rst.py | 8 +++++++- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/userspace-api/netlink/index.rst b/Documentation/userspace-api/netlink/index.rst index 62725dafbbdb..c1b6765cc963 100644 --- a/Documentation/userspace-api/netlink/index.rst +++ b/Documentation/userspace-api/netlink/index.rst @@ -16,4 +16,6 @@ Netlink documentation for users. genetlink-legacy netlink-raw -See also :ref:`Documentation/core-api/netlink.rst `. +See also: + - :ref:`Documentation/core-api/netlink.rst ` + - :ref:`Documentation/networking/netlink_spec/index.rst ` diff --git a/Documentation/userspace-api/netlink/specs.rst b/Documentation/userspace-api/netlink/specs.rst index c1b951649113..1b50d97d8d7c 100644 --- a/Documentation/userspace-api/netlink/specs.rst +++ b/Documentation/userspace-api/netlink/specs.rst @@ -15,7 +15,7 @@ kernel headers directly. Internally kernel uses the YAML specs to generate: - the C uAPI header - - documentation of the protocol as a ReST file + - documentation of the protocol as a ReST file - see :ref:`Documentation/networking/netlink_spec/index.rst ` - policy tables for input attribute validation - operation tables diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index b6292109e236..8c62e040df5d 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -122,6 +122,11 @@ def rst_toctree(maxdepth: int = 2) -> str: return "\n".join(lines) +def rst_label(title: str) -> str: + """Return a formatted label""" + return f".. _{title}:\n\n" + + # Parsers # ======= @@ -349,7 +354,8 @@ def generate_main_index_rst(output: str) -> None: lines = [] lines.append(rst_header()) - lines.append(rst_title("Netlink Specification")) + lines.append(rst_label("specs")) + lines.append(rst_title("Netlink Family Specifications")) lines.append(rst_toctree(1)) index_dir = os.path.dirname(output) -- cgit v1.3.1 From 0de267d9ec6574536ec5ea2f2242df5c92bcdd4b Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 29 Nov 2023 15:44:14 -0800 Subject: Documentation/bpf: Add documentation for filesystem kfuncs Add a brief introduction for file system kfuncs: bpf_get_file_xattr() bpf_get_fsverity_digest() The documentation highlights the strategy to avoid recursions of these kfuncs. Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20231129234417.856536-4-song@kernel.org Signed-off-by: Alexei Starovoitov --- Documentation/bpf/fs_kfuncs.rst | 21 +++++++++++++++++++++ Documentation/bpf/index.rst | 1 + 2 files changed, 22 insertions(+) create mode 100644 Documentation/bpf/fs_kfuncs.rst (limited to 'Documentation') diff --git a/Documentation/bpf/fs_kfuncs.rst b/Documentation/bpf/fs_kfuncs.rst new file mode 100644 index 000000000000..8762c3233a3d --- /dev/null +++ b/Documentation/bpf/fs_kfuncs.rst @@ -0,0 +1,21 @@ +.. SPDX-License-Identifier: GPL-2.0 + +.. _fs_kfuncs-header-label: + +===================== +BPF filesystem kfuncs +===================== + +BPF LSM programs need to access filesystem data from LSM hooks. The following +BPF kfuncs can be used to get these data. + + * ``bpf_get_file_xattr()`` + + * ``bpf_get_fsverity_digest()`` + +To avoid recursions, these kfuncs follow the following rules: + +1. These kfuncs are only permitted from BPF LSM function. +2. These kfuncs should not call into other LSM hooks, i.e. security_*(). For + example, ``bpf_get_file_xattr()`` does not use ``vfs_getxattr()``, because + the latter calls LSM hook ``security_inode_getxattr``. diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index aeaeb35e6d4a..0bb5cb8157f1 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -21,6 +21,7 @@ that goes into great technical depth about the BPF Architecture. helpers kfuncs cpumasks + fs_kfuncs programs maps bpf_prog_run -- cgit v1.3.1 From 527d2cd8b852e06c25c129fe400832bf88d2b5b0 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Thu, 30 Nov 2023 21:49:53 +0000 Subject: doc/netlink: Add bitfield32, s8, s16 to the netlink-raw schema The netlink-raw schema was not updated when bitfield32 was added to the genetlink-legacy schema. It is needed for rtnetlink families. s8 and s16 were also missing. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231130214959.27377-2-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/netlink-raw.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index 775cce8c548a..ad5395040765 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -200,7 +200,8 @@ properties: type: string type: &attr-type description: The netlink attribute type - enum: [ unused, pad, flag, binary, u8, u16, u32, u64, s32, s64, + enum: [ unused, pad, flag, binary, bitfield32, + u8, u16, u32, u64, s8, s16, s32, s64, string, nest, array-nest, nest-type-value ] doc: description: Documentation of the attribute. -- cgit v1.3.1 From cc124ad39288ef8366d1f731567115d077e612a1 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 30 Nov 2023 20:13:59 +0100 Subject: Documentation: networking: add missing PLCA messages from the message list Physical Layer Collision Avoidance messages are correctly documented but were left-out of the global list of ethnl messages, add them to the list. Signed-off-by: Maxime Chevallier Link: https://lore.kernel.org/r/20231130191400.817948-1-maxime.chevallier@bootlin.com Signed-off-by: Jakub Kicinski --- Documentation/networking/ethtool-netlink.rst | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 2540c70952ff..6a49624a9cbf 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -223,6 +223,9 @@ Userspace to kernel: ``ETHTOOL_MSG_PSE_SET`` set PSE parameters ``ETHTOOL_MSG_PSE_GET`` get PSE parameters ``ETHTOOL_MSG_RSS_GET`` get RSS settings + ``ETHTOOL_MSG_PLCA_GET_CFG`` get PLCA RS parameters + ``ETHTOOL_MSG_PLCA_SET_CFG`` set PLCA RS parameters + ``ETHTOOL_MSG_PLCA_GET_STATUS`` get PLCA RS status ``ETHTOOL_MSG_MM_GET`` get MAC merge layer state ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters ===================================== ================================= @@ -267,6 +270,9 @@ Kernel to userspace: ``ETHTOOL_MSG_MODULE_GET_REPLY`` transceiver module parameters ``ETHTOOL_MSG_PSE_GET_REPLY`` PSE parameters ``ETHTOOL_MSG_RSS_GET_REPLY`` RSS settings + ``ETHTOOL_MSG_PLCA_GET_CFG_REPLY`` PLCA RS parameters + ``ETHTOOL_MSG_PLCA_GET_STATUS_REPLY`` PLCA RS status + ``ETHTOOL_MSG_PLCA_NTF`` PLCA RS parameters ``ETHTOOL_MSG_MM_GET_REPLY`` MAC merge layer status ======================================== ================================= -- cgit v1.3.1 From a10859384256cdfab37601239ec1640a982a67e2 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 29 Nov 2023 18:22:58 +0100 Subject: dt-bindings: net: qcom,ipa: document SM8650 compatible Document the IPA on the SM8650 Platform which uses version 5.5.1, which is a minor revision of v5.5 found on SM8550, thus we can use the SM8550 bindings as fallback since it shares the same register mappings. Signed-off-by: Neil Armstrong Reviewed-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231129-topic-sm8650-upstream-bindings-ipa-v1-1-ca21eb2dfb14@linaro.org Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/qcom,ipa.yaml | 25 +++++++++++++--------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 702eadccdf99..c30218684cfe 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -43,16 +43,21 @@ description: properties: compatible: - enum: - - qcom,msm8998-ipa - - qcom,sc7180-ipa - - qcom,sc7280-ipa - - qcom,sdm845-ipa - - qcom,sdx55-ipa - - qcom,sdx65-ipa - - qcom,sm6350-ipa - - qcom,sm8350-ipa - - qcom,sm8550-ipa + oneOf: + - enum: + - qcom,msm8998-ipa + - qcom,sc7180-ipa + - qcom,sc7280-ipa + - qcom,sdm845-ipa + - qcom,sdx55-ipa + - qcom,sdx65-ipa + - qcom,sm6350-ipa + - qcom,sm8350-ipa + - qcom,sm8550-ipa + - items: + - enum: + - qcom,sm8650-ipa + - const: qcom,sm8550-ipa reg: items: -- cgit v1.3.1 From 14006f1d8fa24a2320781ad503ca1cba92e940d2 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Wed, 29 Nov 2023 07:27:52 +0000 Subject: Documentations: Analyze heavily used Networking related structs Analyzed a few structs in the networking stack by looking at variables within them that are used in the TCP/IP fast path. Fast path is defined as TCP path where data is transferred from sender to receiver unidirectionally. It doesn't include phases other than TCP_ESTABLISHED, nor does it look at error paths. We hope to re-organizing variables that span many cachelines whose fast path variables are also spread out, and this document can help future developers keep networking fast path cachelines small. Optimized_cacheline field is computed as (Fastpath_Bytes/L3_cacheline_size_x86), and not the actual organized results (see patches to come for these). Investigation is done on 6.5 Name Struct_Cachelines Cur_fastpath_cache Fastpath_Bytes Optimized_cacheline tcp_sock 42 (2664 Bytes) 12 396 8 net_device 39 (2240 bytes) 12 234 4 inet_sock 15 (960 bytes) 14 922 14 Inet_connection_sock 22 (1368 bytes) 18 1166 18 Netns_ipv4 (sysctls) 12 (768 bytes) 4 77 2 linux_mib 16 (1060) 6 104 2 Note how there isn't much improvement space for inet_sock and Inet_connection_sock because sk and icsk_inet respectively takes up so much of the struct that rest of the variables become a small portion of the struct size. So, we decided to reorganize tcp_sock, net_device, netns_ipv4 Suggested-by: Eric Dumazet Signed-off-by: Coco Li Reviewed-by: Eric Dumazet Reviewed-by: Shakeel Butt Signed-off-by: David S. Miller --- Documentation/networking/index.rst | 1 + Documentation/networking/net_cachelines/index.rst | 15 ++ .../net_cachelines/inet_connection_sock.rst | 49 ++++++ .../networking/net_cachelines/inet_sock.rst | 43 +++++ .../networking/net_cachelines/net_device.rst | 177 +++++++++++++++++++++ .../net_cachelines/netns_ipv4_sysctl.rst | 157 ++++++++++++++++++ Documentation/networking/net_cachelines/snmp.rst | 134 ++++++++++++++++ .../networking/net_cachelines/tcp_sock.rst | 156 ++++++++++++++++++ MAINTAINERS | 3 + 9 files changed, 735 insertions(+) create mode 100644 Documentation/networking/net_cachelines/index.rst create mode 100644 Documentation/networking/net_cachelines/inet_connection_sock.rst create mode 100644 Documentation/networking/net_cachelines/inet_sock.rst create mode 100644 Documentation/networking/net_cachelines/net_device.rst create mode 100644 Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst create mode 100644 Documentation/networking/net_cachelines/snmp.rst create mode 100644 Documentation/networking/net_cachelines/tcp_sock.rst (limited to 'Documentation') diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index d9424dcb19bf..69f3d6dcd9fd 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -75,6 +75,7 @@ Contents: mptcp-sysctl multiqueue napi + net_cachelines/index netconsole netdev-features netdevices diff --git a/Documentation/networking/net_cachelines/index.rst b/Documentation/networking/net_cachelines/index.rst new file mode 100644 index 000000000000..6f1a99989bbd --- /dev/null +++ b/Documentation/networking/net_cachelines/index.rst @@ -0,0 +1,15 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=================================== +Common Networking Struct Cachelines +=================================== + +.. toctree:: + :maxdepth: 1 + + inet_connection_sock + inet_sock + net_device + netns_ipv4_sysctl + snmp + tcp_sock diff --git a/Documentation/networking/net_cachelines/inet_connection_sock.rst b/Documentation/networking/net_cachelines/inet_connection_sock.rst new file mode 100644 index 000000000000..ad2b200147a6 --- /dev/null +++ b/Documentation/networking/net_cachelines/inet_connection_sock.rst @@ -0,0 +1,49 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +===================================================== +inet_connection_sock struct fast path usage breakdown +===================================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..struct ..inet_connection_sock +struct_inet_sock icsk_inet read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data +struct_request_sock_queue icsk_accept_queue - - +struct_inet_bind_bucket icsk_bind_hash read_mostly - tcp_set_state +struct_inet_bind2_bucket icsk_bind2_hash read_mostly - tcp_set_state,inet_put_port +unsigned_long icsk_timeout read_mostly - inet_csk_reset_xmit_timer,tcp_connect +struct_timer_list icsk_retransmit_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect +struct_timer_list icsk_delack_timer read_mostly - inet_csk_reset_xmit_timer,tcp_connect +u32 icsk_rto read_write - tcp_cwnd_validate,tcp_schedule_loss_probe,tcp_connect_init,tcp_connect,tcp_write_xmit,tcp_push_one +u32 icsk_rto_min - - +u32 icsk_delack_max - - +u32 icsk_pmtu_cookie read_write - tcp_sync_mss,tcp_current_mss,tcp_send_syn_data,tcp_connect_init,tcp_connect +struct_tcp_congestion_ops icsk_ca_ops read_write - tcp_cwnd_validate,tcp_tso_segs,tcp_ca_dst_init,tcp_connect_init,tcp_connect,tcp_write_xmit +struct_inet_connection_sock_af_ops icsk_af_ops read_mostly - tcp_finish_connect,tcp_send_syn_data,tcp_mtup_init,tcp_mtu_check_reprobe,tcp_mtu_probe,tcp_connect_init,tcp_connect,__tcp_transmit_skb +struct_tcp_ulp_ops* icsk_ulp_ops - - +void* icsk_ulp_data - - +u8:5 icsk_ca_state read_write - tcp_cwnd_application_limited,tcp_set_ca_state,tcp_enter_cwr,tcp_tso_should_defer,tcp_mtu_probe,tcp_schedule_loss_probe,tcp_write_xmit,__tcp_transmit_skb +u8:1 icsk_ca_initialized read_write - tcp_init_transfer,tcp_init_congestion_control,tcp_init_transfer,tcp_finish_connect,tcp_connect +u8:1 icsk_ca_setsockopt - - +u8:1 icsk_ca_dst_locked write_mostly - tcp_ca_dst_init,tcp_connect_init,tcp_connect +u8 icsk_retransmits write_mostly - tcp_connect_init,tcp_connect +u8 icsk_pending read_write - inet_csk_reset_xmit_timer,tcp_connect,tcp_check_probe_timer,__tcp_push_pending_frames,tcp_rearm_rto,tcp_event_new_data_sent,tcp_event_new_data_sent +u8 icsk_backoff write_mostly - tcp_write_queue_purge,tcp_connect_init +u8 icsk_syn_retries - - +u8 icsk_probes_out - - +u16 icsk_ext_hdr_len read_mostly - __tcp_mtu_to_mss,tcp_mtu_to_rss,tcp_mtu_probe,tcp_write_xmit,tcp_mtu_to_mss, +struct_icsk_ack_u8 pending read_write read_write inet_csk_ack_scheduled,__tcp_cleanup_rbuf,tcp_cleanup_rbuf,inet_csk_clear_xmit_timer,tcp_event_ack-sent,inet_csk_reset_xmit_timer +struct_icsk_ack_u8 quick read_write write_mostly tcp_dec_quickack_mode,tcp_event_ack_sent,__tcp_transmit_skb,__tcp_select_window,__tcp_cleanup_rbuf +struct_icsk_ack_u8 pingpong - - +struct_icsk_ack_u8 retry write_mostly read_write inet_csk_clear_xmit_timer,tcp_rearm_rto,tcp_event_new_data_sent,tcp_write_xmit,__tcp_send_ack,tcp_send_ack, +struct_icsk_ack_u8 ato read_mostly write_mostly tcp_dec_quickack_mode,tcp_event_ack_sent,__tcp_transmit_skb,__tcp_send_ack,tcp_send_ack +struct_icsk_ack_unsigned_long timeout read_write read_write inet_csk_reset_xmit_timer,tcp_connect +struct_icsk_ack_u32 lrcvtime read_write - tcp_finish_connect,tcp_connect,tcp_event_data_sent,__tcp_transmit_skb +struct_icsk_ack_u16 rcv_mss write_mostly read_mostly __tcp_select_window,__tcp_cleanup_rbuf,tcp_initialize_rcv_mss,tcp_connect_init +struct_icsk_mtup_int search_high read_write - tcp_mtup_init,tcp_sync_mss,tcp_connect_init,tcp_mtu_check_reprobe,tcp_write_xmit +struct_icsk_mtup_int search_low read_write - tcp_mtu_probe,tcp_mtu_check_reprobe,tcp_write_xmit,tcp_sync_mss,tcp_connect_init,tcp_mtup_init +struct_icsk_mtup_u32:31 probe_size read_write - tcp_mtup_init,tcp_connect_init,__tcp_transmit_skb +struct_icsk_mtup_u32:1 enabled read_write - tcp_mtup_init,tcp_sync_mss,tcp_connect_init,tcp_mtu_probe,tcp_write_xmit +struct_icsk_mtup_u32 probe_timestamp read_write - tcp_mtup_init,tcp_connect_init,tcp_mtu_check_reprobe,tcp_mtu_probe +u32 icsk_probes_tstamp - - +u32 icsk_user_timeout - - +u64[104/sizeof(u64)] icsk_ca_priv - - diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst new file mode 100644 index 000000000000..4077febdb995 --- /dev/null +++ b/Documentation/networking/net_cachelines/inet_sock.rst @@ -0,0 +1,43 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +===================================================== +inet_connection_sock struct fast path usage breakdown +===================================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..struct ..inet_sock +struct_sock sk read_mostly read_mostly tcp_init_buffer_space,tcp_init_transfer,tcp_finish_connect,tcp_connect,tcp_send_rcvq,tcp_send_syn_data +struct_ipv6_pinfo* pinet6 - - +be16 inet_sport read_mostly - __tcp_transmit_skb +be32 inet_daddr read_mostly - ip_select_ident_segs +be32 inet_rcv_saddr - - +be16 inet_dport read_mostly - __tcp_transmit_skb +u16 inet_num - - +be32 inet_saddr - - +s16 uc_ttl read_mostly - __ip_queue_xmit/ip_select_ttl +u16 cmsg_flags - - +struct_ip_options_rcu* inet_opt read_mostly - __ip_queue_xmit +u16 inet_id read_mostly - ip_select_ident_segs +u8 tos read_mostly - ip_queue_xmit +u8 min_ttl - - +u8 mc_ttl - - +u8 pmtudisc - - +u8:1 recverr - - +u8:1 is_icsk - - +u8:1 freebind - - +u8:1 hdrincl - - +u8:1 mc_loop - - +u8:1 transparent - - +u8:1 mc_all - - +u8:1 nodefrag - - +u8:1 bind_address_no_port - - +u8:1 recverr_rfc4884 - - +u8:1 defer_connect read_mostly - tcp_sendmsg_fastopen +u8 rcv_tos - - +u8 convert_csum - - +int uc_index - - +int mc_index - - +be32 mc_addr - - +struct_ip_mc_socklist* mc_list - - +struct_inet_cork_full cork read_mostly - __tcp_transmit_skb +struct local_port_range - - diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst new file mode 100644 index 000000000000..98508aa4f800 --- /dev/null +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -0,0 +1,177 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=========================================== +net_device struct fast path usage breakdown +=========================================== + +Type Name fastpath_tx_access fastpath_rx_access Comments +..struct ..net_device +char name[16] - - +struct_netdev_name_node* name_node +struct_dev_ifalias* ifalias +unsigned_long mem_end +unsigned_long mem_start +unsigned_long base_addr +unsigned_long state +struct_list_head dev_list +struct_list_head napi_list +struct_list_head unreg_list +struct_list_head close_list +struct_list_head ptype_all read_mostly - dev_nit_active(tx) +struct_list_head ptype_specific read_mostly deliver_ptype_list_skb/__netif_receive_skb_core(rx) +struct adj_list +unsigned_int flags read_mostly read_mostly __dev_queue_xmit,__dev_xmit_skb,ip6_output,__ip6_finish_output(tx);ip6_rcv_core(rx) +xdp_features_t xdp_features +unsigned_long_long priv_flags read_mostly - __dev_queue_xmit(tx) +struct_net_device_ops* netdev_ops read_mostly - netdev_core_pick_tx,netdev_start_xmit(tx) +struct_xdp_metadata_ops* xdp_metadata_ops +int ifindex - read_mostly ip6_rcv_core +unsigned_short gflags +unsigned_short hard_header_len read_mostly read_mostly ip6_xmit(tx);gro_list_prepare(rx) +unsigned_int mtu read_mostly - ip_finish_output2 +unsigned_short needed_headroom read_mostly - LL_RESERVED_SPACE/ip_finish_output2 +unsigned_short needed_tailroom +netdev_features_t features read_mostly read_mostly HARD_TX_LOCK,netif_skb_features,sk_setup_caps(tx);netif_elide_gro(rx) +netdev_features_t hw_features +netdev_features_t wanted_features +netdev_features_t vlan_features +netdev_features_t hw_enc_features - - netif_skb_features +netdev_features_t mpls_features +netdev_features_t gso_partial_features +unsigned_int min_mtu +unsigned_int max_mtu +unsigned_short type +unsigned_char min_header_len +unsigned_char name_assign_type +int group +struct_net_device_stats stats +struct_net_device_core_stats* core_stats +atomic_t carrier_up_count +atomic_t carrier_down_count +struct_iw_handler_def* wireless_handlers +struct_iw_public_data* wireless_data +struct_ethtool_ops* ethtool_ops +struct_l3mdev_ops* l3mdev_ops +struct_ndisc_ops* ndisc_ops +struct_xfrmdev_ops* xfrmdev_ops +struct_tlsdev_ops* tlsdev_ops +struct_header_ops* header_ops read_mostly - ip_finish_output2,ip6_finish_output2(tx) +unsigned_char operstate +unsigned_char link_mode +unsigned_char if_port +unsigned_char dma +unsigned_char perm_addr[32] +unsigned_char addr_assign_type +unsigned_char addr_len +unsigned_char upper_level +unsigned_char lower_level +unsigned_short neigh_priv_len +unsigned_short padded +unsigned_short dev_id +unsigned_short dev_port +spinlock_t addr_list_lock +int irq +struct_netdev_hw_addr_list uc +struct_netdev_hw_addr_list mc +struct_netdev_hw_addr_list dev_addrs +struct_kset* queues_kset +struct_list_head unlink_list +unsigned_int promiscuity +unsigned_int allmulti +bool uc_promisc +unsigned_char nested_level +struct_in_device* ip_ptr read_mostly read_mostly __in_dev_get +struct_inet6_dev* ip6_ptr read_mostly read_mostly __in6_dev_get +struct_vlan_info* vlan_info +struct_dsa_port* dsa_ptr +struct_tipc_bearer* tipc_ptr +void* atalk_ptr +void* ax25_ptr +struct_wireless_dev* ieee80211_ptr +struct_wpan_dev* ieee802154_ptr +struct_mpls_dev* mpls_ptr +struct_mctp_dev* mctp_ptr +unsigned_char* dev_addr +struct_netdev_queue* _rx read_mostly - netdev_get_rx_queue(rx) +unsigned_int num_rx_queues +unsigned_int real_num_rx_queues - read_mostly get_rps_cpu +struct_bpf_prog* xdp_prog +unsigned_long gro_flush_timeout - read_mostly napi_complete_done +int napi_defer_hard_irqs - read_mostly napi_complete_done +unsigned_int gro_max_size - read_mostly skb_gro_receive +unsigned_int gro_ipv4_max_size - read_mostly skb_gro_receive +rx_handler_func_t* rx_handler read_mostly - __netif_receive_skb_core +void* rx_handler_data read_mostly - +struct_netdev_queue* ingress_queue read_mostly - +struct_bpf_mprog_entry tcx_ingress - read_mostly sch_handle_ingress +struct_nf_hook_entries* nf_hooks_ingress +unsigned_char broadcast[32] +struct_cpu_rmap* rx_cpu_rmap +struct_hlist_node index_hlist +struct_netdev_queue* _tx read_mostly - netdev_get_tx_queue(tx) +unsigned_int num_tx_queues - - +unsigned_int real_num_tx_queues read_mostly - skb_tx_hash,netdev_core_pick_tx(tx) +unsigned_int tx_queue_len +spinlock_t tx_global_lock +struct_xdp_dev_bulk_queue__percpu* xdp_bulkq +struct_xps_dev_maps* xps_maps[2] read_mostly - __netif_set_xps_queue +struct_bpf_mprog_entry tcx_egress read_mostly - sch_handle_egress +struct_nf_hook_entries* nf_hooks_egress read_mostly - +struct_hlist_head qdisc_hash[16] +struct_timer_list watchdog_timer +int watchdog_timeo +u32 proto_down_reason +struct_list_head todo_list +int__percpu* pcpu_refcnt +refcount_t dev_refcnt +struct_ref_tracker_dir refcnt_tracker +struct_list_head link_watch_list +enum:8 reg_state +bool dismantle +enum:16 rtnl_link_state +bool needs_free_netdev +void*priv_destructor struct_net_device +struct_netpoll_info* npinfo - read_mostly napi_poll/napi_poll_lock +possible_net_t nd_net - read_mostly (dev_net)napi_busy_loop,tcp_v(4/6)_rcv,ip(v6)_rcv,ip(6)_input,ip(6)_input_finish +void* ml_priv +enum_netdev_ml_priv_type ml_priv_type +struct_pcpu_lstats__percpu* lstats +struct_pcpu_sw_netstats__percpu* tstats +struct_pcpu_dstats__percpu* dstats +struct_garp_port* garp_port +struct_mrp_port* mrp_port +struct_dm_hw_stat_delta* dm_private +struct_device dev - - +struct_attribute_group* sysfs_groups[4] +struct_attribute_group* sysfs_rx_queue_group +struct_rtnl_link_ops* rtnl_link_ops +unsigned_int gso_max_size read_mostly - sk_dst_gso_max_size +unsigned_int tso_max_size +u16 gso_max_segs read_mostly - gso_max_segs +u16 tso_max_segs +unsigned_int gso_ipv4_max_size read_mostly - sk_dst_gso_max_size +struct_dcbnl_rtnl_ops* dcbnl_ops +s16 num_tc read_mostly - skb_tx_hash +struct_netdev_tc_txq tc_to_txq[16] read_mostly - skb_tx_hash +u8 prio_tc_map[16] +unsigned_int fcoe_ddp_xid +struct_netprio_map* priomap +struct_phy_device* phydev +struct_sfp_bus* sfp_bus +struct_lock_class_key* qdisc_tx_busylock +bool proto_down +unsigned:1 wol_enabled +unsigned:1 threaded - - napi_poll(napi_enable,dev_set_threaded) +struct_list_head net_notifier_list +struct_macsec_ops* macsec_ops +struct_udp_tunnel_nic_info* udp_tunnel_nic_info +struct_udp_tunnel_nic* udp_tunnel_nic +unsigned_int xdp_zc_max_segs +struct_bpf_xdp_entity xdp_state[3] +u8 dev_addr_shadow[32] +netdevice_tracker linkwatch_dev_tracker +netdevice_tracker watchdog_dev_tracker +netdevice_tracker dev_registered_tracker +struct_rtnl_hw_stats64* offload_xstats_l3 +struct_devlink_port* devlink_port +struct_dpll_pin* dpll_pin diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst new file mode 100644 index 000000000000..47e1a0105384 --- /dev/null +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -0,0 +1,157 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=========================================== +netns_ipv4 struct fast path usage breakdown +=========================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..struct ..netns_ipv4 +struct_inet_timewait_death_row tcp_death_row +struct_udp_table* udp_table +struct_ctl_table_header* forw_hdr +struct_ctl_table_header* frags_hdr +struct_ctl_table_header* ipv4_hdr +struct_ctl_table_header* route_hdr +struct_ctl_table_header* xfrm4_hdr +struct_ipv4_devconf* devconf_all +struct_ipv4_devconf* devconf_dflt +struct_ip_ra_chain ra_chain +struct_mutex ra_mutex +struct_fib_rules_ops* rules_ops +struct_fib_table fib_main +struct_fib_table fib_default +unsigned_int fib_rules_require_fldissect +bool fib_has_custom_rules +bool fib_has_custom_local_routes +bool fib_offload_disabled +atomic_t fib_num_tclassid_users +struct_hlist_head* fib_table_hash +struct_sock* fibnl +struct_sock* mc_autojoin_sk +struct_inet_peer_base* peers +struct_fqdir* fqdir +u8 sysctl_icmp_echo_ignore_all +u8 sysctl_icmp_echo_enable_probe +u8 sysctl_icmp_echo_ignore_broadcasts +u8 sysctl_icmp_ignore_bogus_error_responses +u8 sysctl_icmp_errors_use_inbound_ifaddr +int sysctl_icmp_ratelimit +int sysctl_icmp_ratemask +u32 ip_rt_min_pmtu - - +int ip_rt_mtu_expires - - +int ip_rt_min_advmss - - +struct_local_ports ip_local_ports - - +u8 sysctl_tcp_ecn - - +u8 sysctl_tcp_ecn_fallback - - +u8 sysctl_ip_default_ttl - - ip4_dst_hoplimit/ip_select_ttl +u8 sysctl_ip_no_pmtu_disc - - +u8 sysctl_ip_fwd_use_pmtu read_mostly - ip_dst_mtu_maybe_forward/ip_skb_dst_mtu +u8 sysctl_ip_fwd_update_priority - - ip_forward +u8 sysctl_ip_nonlocal_bind - - +u8 sysctl_ip_autobind_reuse - - +u8 sysctl_ip_dynaddr - - +u8 sysctl_ip_early_demux - read_mostly ip(6)_rcv_finish_core +u8 sysctl_raw_l3mdev_accept - - +u8 sysctl_tcp_early_demux - read_mostly ip(6)_rcv_finish_core +u8 sysctl_udp_early_demux +u8 sysctl_nexthop_compat_mode - - +u8 sysctl_fwmark_reflect - - +u8 sysctl_tcp_fwmark_accept - - +u8 sysctl_tcp_l3mdev_accept - - +u8 sysctl_tcp_mtu_probing - - +int sysctl_tcp_mtu_probe_floor - - +int sysctl_tcp_base_mss - - +int sysctl_tcp_min_snd_mss read_mostly - __tcp_mtu_to_mss(tcp_write_xmit) +int sysctl_tcp_probe_threshold - - tcp_mtu_probe(tcp_write_xmit) +u32 sysctl_tcp_probe_interval - - tcp_mtu_check_reprobe(tcp_write_xmit) +int sysctl_tcp_keepalive_time - - +int sysctl_tcp_keepalive_intvl - - +u8 sysctl_tcp_keepalive_probes - - +u8 sysctl_tcp_syn_retries - - +u8 sysctl_tcp_synack_retries - - +u8 sysctl_tcp_syncookies - - generated_on_syn +u8 sysctl_tcp_migrate_req - - reuseport +u8 sysctl_tcp_comp_sack_nr - - __tcp_ack_snd_check +int sysctl_tcp_reordering - read_mostly tcp_may_raise_cwnd/tcp_cong_control +u8 sysctl_tcp_retries1 - - +u8 sysctl_tcp_retries2 - - +u8 sysctl_tcp_orphan_retries - - +u8 sysctl_tcp_tw_reuse - - timewait_sock_ops +int sysctl_tcp_fin_timeout - - TCP_LAST_ACK/tcp_rcv_state_process +unsigned_int sysctl_tcp_notsent_lowat read_mostly - tcp_notsent_lowat/tcp_stream_memory_free +u8 sysctl_tcp_sack - - tcp_syn_options +u8 sysctl_tcp_window_scaling - - tcp_syn_options,tcp_parse_options +u8 sysctl_tcp_timestamps +u8 sysctl_tcp_early_retrans read_mostly - tcp_schedule_loss_probe(tcp_write_xmit) +u8 sysctl_tcp_recovery - - tcp_fastretrans_alert +u8 sysctl_tcp_thin_linear_timeouts - - tcp_retrans_timer(on_thin_streams) +u8 sysctl_tcp_slow_start_after_idle - - unlikely(tcp_cwnd_validate-network-not-starved) +u8 sysctl_tcp_retrans_collapse - - +u8 sysctl_tcp_stdurg - - unlikely(tcp_check_urg) +u8 sysctl_tcp_rfc1337 - - +u8 sysctl_tcp_abort_on_overflow - - +u8 sysctl_tcp_fack - - +int sysctl_tcp_max_reordering - - tcp_check_sack_reordering +int sysctl_tcp_adv_win_scale - - tcp_init_buffer_space +u8 sysctl_tcp_dsack - - partial_packet_or_retrans_in_tcp_data_queue +u8 sysctl_tcp_app_win - - tcp_win_from_space +u8 sysctl_tcp_frto - - tcp_enter_loss +u8 sysctl_tcp_nometrics_save - - TCP_LAST_ACK/tcp_update_metrics +u8 sysctl_tcp_no_ssthresh_metrics_save - - TCP_LAST_ACK/tcp_(update/init)_metrics +u8 sysctl_tcp_moderate_rcvbuf read_mostly read_mostly tcp_tso_should_defer(tx);tcp_rcv_space_adjust(rx) +u8 sysctl_tcp_tso_win_divisor read_mostly - tcp_tso_should_defer(tcp_write_xmit) +u8 sysctl_tcp_workaround_signed_windows - - tcp_select_window +int sysctl_tcp_limit_output_bytes read_mostly - tcp_small_queue_check(tcp_write_xmit) +int sysctl_tcp_challenge_ack_limit - - +int sysctl_tcp_min_rtt_wlen read_mostly - tcp_ack_update_rtt +u8 sysctl_tcp_min_tso_segs - - unlikely(icsk_ca_ops-written) +u8 sysctl_tcp_tso_rtt_log read_mostly - tcp_tso_autosize +u8 sysctl_tcp_autocorking read_mostly - tcp_push/tcp_should_autocork +u8 sysctl_tcp_reflect_tos - - tcp_v(4/6)_send_synack +int sysctl_tcp_invalid_ratelimit - - +int sysctl_tcp_pacing_ss_ratio - - default_cong_cont(tcp_update_pacing_rate) +int sysctl_tcp_pacing_ca_ratio - - default_cong_cont(tcp_update_pacing_rate) +int sysctl_tcp_wmem[3] read_mostly - tcp_wmem_schedule(sendmsg/sendpage) +int sysctl_tcp_rmem[3] - read_mostly __tcp_grow_window(tx),tcp_rcv_space_adjust(rx) +unsigned_int sysctl_tcp_child_ehash_entries +unsigned_long sysctl_tcp_comp_sack_delay_ns - - __tcp_ack_snd_check +unsigned_long sysctl_tcp_comp_sack_slack_ns - - __tcp_ack_snd_check +int sysctl_max_syn_backlog - - +int sysctl_tcp_fastopen - - +struct_tcp_congestion_ops tcp_congestion_control - - init_cc +struct_tcp_fastopen_context tcp_fastopen_ctx - - +unsigned_int sysctl_tcp_fastopen_blackhole_timeout - - +atomic_t tfo_active_disable_times - - +unsigned_long tfo_active_disable_stamp - - +u32 tcp_challenge_timestamp - - +u32 tcp_challenge_count - - +u8 sysctl_tcp_plb_enabled - - +u8 sysctl_tcp_plb_idle_rehash_rounds - - +u8 sysctl_tcp_plb_rehash_rounds - - +u8 sysctl_tcp_plb_suspend_rto_sec - - +int sysctl_tcp_plb_cong_thresh - - +int sysctl_udp_wmem_min +int sysctl_udp_rmem_min +u8 sysctl_fib_notify_on_flag_change +u8 sysctl_udp_l3mdev_accept +u8 sysctl_igmp_llm_reports +int sysctl_igmp_max_memberships +int sysctl_igmp_max_msf +int sysctl_igmp_qrv +struct_ping_group_range ping_group_range +atomic_t dev_addr_genid +unsigned_int sysctl_udp_child_hash_entries +unsigned_long* sysctl_local_reserved_ports +int sysctl_ip_prot_sock +struct_mr_table* mrt +struct_list_head mr_tables +struct_fib_rules_ops* mr_rules_ops +u32 sysctl_fib_multipath_hash_fields +u8 sysctl_fib_multipath_use_neigh +u8 sysctl_fib_multipath_hash_policy +struct_fib_notifier_ops* notifier_ops +unsigned_int fib_seq +struct_fib_notifier_ops* ipmr_notifier_ops +unsigned_int ipmr_seq +atomic_t rt_genid +siphash_key_t ip_id_key diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst new file mode 100644 index 000000000000..1eaeb8bc252c --- /dev/null +++ b/Documentation/networking/net_cachelines/snmp.rst @@ -0,0 +1,134 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +=========================================== +netns_ipv4 enum fast path usage breakdown +=========================================== + +Type Name fastpath_tx_access fastpath_rx_access comment +..enum +unsigned_long LINUX_MIB_TCPKEEPALIVE write_mostly - tcp_keepalive_timer +unsigned_long LINUX_MIB_DELAYEDACKS write_mostly - tcp_delack_timer_handler,tcp_delack_timer +unsigned_long LINUX_MIB_DELAYEDACKLOCKED write_mostly - tcp_delack_timer_handler,tcp_delack_timer +unsigned_long LINUX_MIB_TCPAUTOCORKING write_mostly - tcp_push,tcp_sendmsg_locked +unsigned_long LINUX_MIB_TCPFROMZEROWINDOWADV write_mostly - tcp_select_window,tcp_transmit-skb +unsigned_long LINUX_MIB_TCPTOZEROWINDOWADV write_mostly - tcp_select_window,tcp_transmit-skb +unsigned_long LINUX_MIB_TCPWANTZEROWINDOWADV write_mostly - tcp_select_window,tcp_transmit-skb +unsigned_long LINUX_MIB_TCPORIGDATASENT write_mostly - tcp_write_xmit +unsigned_long LINUX_MIB_TCPHPHITS - write_mostly tcp_rcv_established,tcp_v4_do_rcv,tcp_v6_do_rcv +unsigned_long LINUX_MIB_TCPRCVCOALESCE - write_mostly tcp_try_coalesce,tcp_queue_rcv,tcp_rcv_established +unsigned_long LINUX_MIB_TCPPUREACKS - write_mostly tcp_ack,tcp_rcv_established +unsigned_long LINUX_MIB_TCPHPACKS - write_mostly tcp_ack,tcp_rcv_established +unsigned_long LINUX_MIB_TCPDELIVERED - write_mostly tcp_newly_delivered,tcp_ack,tcp_rcv_established +unsigned_long LINUX_MIB_SYNCOOKIESSENT +unsigned_long LINUX_MIB_SYNCOOKIESRECV +unsigned_long LINUX_MIB_SYNCOOKIESFAILED +unsigned_long LINUX_MIB_EMBRYONICRSTS +unsigned_long LINUX_MIB_PRUNECALLED +unsigned_long LINUX_MIB_RCVPRUNED +unsigned_long LINUX_MIB_OFOPRUNED +unsigned_long LINUX_MIB_OUTOFWINDOWICMPS +unsigned_long LINUX_MIB_LOCKDROPPEDICMPS +unsigned_long LINUX_MIB_ARPFILTER +unsigned_long LINUX_MIB_TIMEWAITED +unsigned_long LINUX_MIB_TIMEWAITRECYCLED +unsigned_long LINUX_MIB_TIMEWAITKILLED +unsigned_long LINUX_MIB_PAWSACTIVEREJECTED +unsigned_long LINUX_MIB_PAWSESTABREJECTED +unsigned_long LINUX_MIB_DELAYEDACKLOST +unsigned_long LINUX_MIB_LISTENOVERFLOWS +unsigned_long LINUX_MIB_LISTENDROPS +unsigned_long LINUX_MIB_TCPRENORECOVERY +unsigned_long LINUX_MIB_TCPSACKRECOVERY +unsigned_long LINUX_MIB_TCPSACKRENEGING +unsigned_long LINUX_MIB_TCPSACKREORDER +unsigned_long LINUX_MIB_TCPRENOREORDER +unsigned_long LINUX_MIB_TCPTSREORDER +unsigned_long LINUX_MIB_TCPFULLUNDO +unsigned_long LINUX_MIB_TCPPARTIALUNDO +unsigned_long LINUX_MIB_TCPDSACKUNDO +unsigned_long LINUX_MIB_TCPLOSSUNDO +unsigned_long LINUX_MIB_TCPLOSTRETRANSMIT +unsigned_long LINUX_MIB_TCPRENOFAILURES +unsigned_long LINUX_MIB_TCPSACKFAILURES +unsigned_long LINUX_MIB_TCPLOSSFAILURES +unsigned_long LINUX_MIB_TCPFASTRETRANS +unsigned_long LINUX_MIB_TCPSLOWSTARTRETRANS +unsigned_long LINUX_MIB_TCPTIMEOUTS +unsigned_long LINUX_MIB_TCPLOSSPROBES +unsigned_long LINUX_MIB_TCPLOSSPROBERECOVERY +unsigned_long LINUX_MIB_TCPRENORECOVERYFAIL +unsigned_long LINUX_MIB_TCPSACKRECOVERYFAIL +unsigned_long LINUX_MIB_TCPRCVCOLLAPSED +unsigned_long LINUX_MIB_TCPDSACKOLDSENT +unsigned_long LINUX_MIB_TCPDSACKOFOSENT +unsigned_long LINUX_MIB_TCPDSACKRECV +unsigned_long LINUX_MIB_TCPDSACKOFORECV +unsigned_long LINUX_MIB_TCPABORTONDATA +unsigned_long LINUX_MIB_TCPABORTONCLOSE +unsigned_long LINUX_MIB_TCPABORTONMEMORY +unsigned_long LINUX_MIB_TCPABORTONTIMEOUT +unsigned_long LINUX_MIB_TCPABORTONLINGER +unsigned_long LINUX_MIB_TCPABORTFAILED +unsigned_long LINUX_MIB_TCPMEMORYPRESSURES +unsigned_long LINUX_MIB_TCPMEMORYPRESSURESCHRONO +unsigned_long LINUX_MIB_TCPSACKDISCARD +unsigned_long LINUX_MIB_TCPDSACKIGNOREDOLD +unsigned_long LINUX_MIB_TCPDSACKIGNOREDNOUNDO +unsigned_long LINUX_MIB_TCPSPURIOUSRTOS +unsigned_long LINUX_MIB_TCPMD5NOTFOUND +unsigned_long LINUX_MIB_TCPMD5UNEXPECTED +unsigned_long LINUX_MIB_TCPMD5FAILURE +unsigned_long LINUX_MIB_SACKSHIFTED +unsigned_long LINUX_MIB_SACKMERGED +unsigned_long LINUX_MIB_SACKSHIFTFALLBACK +unsigned_long LINUX_MIB_TCPBACKLOGDROP +unsigned_long LINUX_MIB_PFMEMALLOCDROP +unsigned_long LINUX_MIB_TCPMINTTLDROP +unsigned_long LINUX_MIB_TCPDEFERACCEPTDROP +unsigned_long LINUX_MIB_IPRPFILTER +unsigned_long LINUX_MIB_TCPTIMEWAITOVERFLOW +unsigned_long LINUX_MIB_TCPREQQFULLDOCOOKIES +unsigned_long LINUX_MIB_TCPREQQFULLDROP +unsigned_long LINUX_MIB_TCPRETRANSFAIL +unsigned_long LINUX_MIB_TCPBACKLOGCOALESCE +unsigned_long LINUX_MIB_TCPOFOQUEUE +unsigned_long LINUX_MIB_TCPOFODROP +unsigned_long LINUX_MIB_TCPOFOMERGE +unsigned_long LINUX_MIB_TCPCHALLENGEACK +unsigned_long LINUX_MIB_TCPSYNCHALLENGE +unsigned_long LINUX_MIB_TCPFASTOPENACTIVE +unsigned_long LINUX_MIB_TCPFASTOPENACTIVEFAIL +unsigned_long LINUX_MIB_TCPFASTOPENPASSIVE +unsigned_long LINUX_MIB_TCPFASTOPENPASSIVEFAIL +unsigned_long LINUX_MIB_TCPFASTOPENLISTENOVERFLOW +unsigned_long LINUX_MIB_TCPFASTOPENCOOKIEREQD +unsigned_long LINUX_MIB_TCPFASTOPENBLACKHOLE +unsigned_long LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES +unsigned_long LINUX_MIB_BUSYPOLLRXPACKETS +unsigned_long LINUX_MIB_TCPSYNRETRANS +unsigned_long LINUX_MIB_TCPHYSTARTTRAINDETECT +unsigned_long LINUX_MIB_TCPHYSTARTTRAINCWND +unsigned_long LINUX_MIB_TCPHYSTARTDELAYDETECT +unsigned_long LINUX_MIB_TCPHYSTARTDELAYCWND +unsigned_long LINUX_MIB_TCPACKSKIPPEDSYNRECV +unsigned_long LINUX_MIB_TCPACKSKIPPEDPAWS +unsigned_long LINUX_MIB_TCPACKSKIPPEDSEQ +unsigned_long LINUX_MIB_TCPACKSKIPPEDFINWAIT2 +unsigned_long LINUX_MIB_TCPACKSKIPPEDTIMEWAIT +unsigned_long LINUX_MIB_TCPACKSKIPPEDCHALLENGE +unsigned_long LINUX_MIB_TCPWINPROBE +unsigned_long LINUX_MIB_TCPMTUPFAIL +unsigned_long LINUX_MIB_TCPMTUPSUCCESS +unsigned_long LINUX_MIB_TCPDELIVEREDCE +unsigned_long LINUX_MIB_TCPACKCOMPRESSED +unsigned_long LINUX_MIB_TCPZEROWINDOWDROP +unsigned_long LINUX_MIB_TCPRCVQDROP +unsigned_long LINUX_MIB_TCPWQUEUETOOBIG +unsigned_long LINUX_MIB_TCPFASTOPENPASSIVEALTKEY +unsigned_long LINUX_MIB_TCPTIMEOUTREHASH +unsigned_long LINUX_MIB_TCPDUPLICATEDATAREHASH +unsigned_long LINUX_MIB_TCPDSACKRECVSEGS +unsigned_long LINUX_MIB_TCPDSACKIGNOREDDUBIOUS +unsigned_long LINUX_MIB_TCPMIGRATEREQSUCCESS +unsigned_long LINUX_MIB_TCPMIGRATEREQFAILURE +unsigned_long __LINUX_MIB_MAX diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst new file mode 100644 index 000000000000..802ad22015d7 --- /dev/null +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -0,0 +1,156 @@ +.. SPDX-License-Identifier: GPL-2.0 +.. Copyright (C) 2023 Google LLC +========================================= +tcp_sock struct fast path usage breakdown +========================================= + +Type Name fastpath_tx_access fastpath_rx_access Comments +..struct ..tcp_sock +struct_inet_connection_sock inet_conn +u16 tcp_header_len read_mostly read_mostly tcp_bound_to_half_wnd,tcp_current_mss(tx);tcp_rcv_established(rx) +u16 gso_segs read_mostly - tcp_xmit_size_goal +__be32 pred_flags read_write read_mostly tcp_select_window(tx);tcp_rcv_established(rx) +u64 bytes_received - read_write tcp_rcv_nxt_update(rx) +u32 segs_in - read_write tcp_v6_rcv(rx) +u32 data_segs_in - read_write tcp_v6_rcv(rx) +u32 rcv_nxt read_mostly read_write tcp_cleanup_rbuf,tcp_send_ack,tcp_inq_hint,tcp_transmit_skb,tcp_receive_window(tx);tcp_v6_do_rcv,tcp_rcv_established,tcp_data_queue,tcp_receive_window,tcp_rcv_nxt_update(write)(rx) +u32 copied_seq - read_mostly tcp_cleanup_rbuf,tcp_rcv_space_adjust,tcp_inq_hint +u32 rcv_wup - read_write __tcp_cleanup_rbuf,tcp_receive_window,tcp_receive_established +u32 snd_nxt read_write read_mostly tcp_rate_check_app_limited,__tcp_transmit_skb,tcp_event_new_data_sent(write)(tx);tcp_rcv_established,tcp_ack,tcp_clean_rtx_queue(rx) +u32 segs_out read_write - __tcp_transmit_skb +u32 data_segs_out read_write - __tcp_transmit_skb,tcp_update_skb_after_send +u64 bytes_sent read_write - __tcp_transmit_skb +u64 bytes_acked - read_write tcp_snd_una_update/tcp_ack +u32 dsack_dups +u32 snd_una read_mostly read_write tcp_wnd_end,tcp_urg_mode,tcp_minshall_check,tcp_cwnd_validate(tx);tcp_ack,tcp_may_update_window,tcp_clean_rtx_queue(write),tcp_ack_tstamp(rx) +u32 snd_sml read_write - tcp_minshall_check,tcp_minshall_update +u32 rcv_tstamp - read_mostly tcp_ack +u32 lsndtime read_write - tcp_slow_start_after_idle_check,tcp_event_data_sent +u32 last_oow_ack_time +u32 compressed_ack_rcv_nxt +u32 tsoffset read_mostly read_mostly tcp_established_options(tx);tcp_fast_parse_options(rx) +struct_list_head tsq_node - - +struct_list_head tsorted_sent_queue read_write - tcp_update_skb_after_send +u32 snd_wl1 - read_mostly tcp_may_update_window +u32 snd_wnd read_mostly read_mostly tcp_wnd_end,tcp_tso_should_defer(tx);tcp_fast_path_on(rx) +u32 max_window read_mostly - tcp_bound_to_half_wnd,forced_push +u32 mss_cache read_mostly read_mostly tcp_rate_check_app_limited,tcp_current_mss,tcp_sync_mss,tcp_sndbuf_expand,tcp_tso_should_defer(tx);tcp_update_pacing_rate,tcp_clean_rtx_queue(rx) +u32 window_clamp read_mostly read_write tcp_rcv_space_adjust,__tcp_select_window +u32 rcv_ssthresh read_mostly - __tcp_select_window +u82 scaling_ratio +struct tcp_rack +u16 advmss - read_mostly tcp_rcv_space_adjust +u8 compressed_ack +u8:2 dup_ack_counter +u8:1 tlp_retrans +u8:1 tcp_usec_ts +u32 chrono_start read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) +u32[3] chrono_stat read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) +u8:2 chrono_type read_write - tcp_chrono_start/stop(tcp_write_xmit,tcp_cwnd_validate,tcp_send_syn_data) +u8:1 rate_app_limited - read_write tcp_rate_gen +u8:1 fastopen_connect +u8:1 fastopen_no_cookie +u8:1 is_sack_reneg - read_mostly tcp_skb_entail,tcp_ack +u8:2 fastopen_client_fail +u8:4 nonagle read_write - tcp_skb_entail,tcp_push_pending_frames +u8:1 thin_lto +u8:1 recvmsg_inq +u8:1 repair read_mostly - tcp_write_xmit +u8:1 frto +u8 repair_queue - - +u8:2 save_syn +u8:1 syn_data +u8:1 syn_fastopen +u8:1 syn_fastopen_exp +u8:1 syn_fastopen_ch +u8:1 syn_data_acked +u8:1 is_cwnd_limited read_mostly - tcp_cwnd_validate,tcp_is_cwnd_limited +u32 tlp_high_seq - read_mostly tcp_ack +u32 tcp_tx_delay +u64 tcp_wstamp_ns read_write - tcp_pacing_check,tcp_tso_should_defer,tcp_update_skb_after_send +u64 tcp_clock_cache read_write read_write tcp_mstamp_refresh(tcp_write_xmit/tcp_rcv_space_adjust),__tcp_transmit_skb,tcp_tso_should_defer;timer +u64 tcp_mstamp read_write read_write tcp_mstamp_refresh(tcp_write_xmit/tcp_rcv_space_adjust)(tx);tcp_rcv_space_adjust,tcp_rate_gen,tcp_clean_rtx_queue,tcp_ack_update_rtt/tcp_time_stamp(rx);timer +u32 srtt_us read_mostly read_write tcp_tso_should_defer(tx);tcp_update_pacing_rate,__tcp_set_rto,tcp_rtt_estimator(rx) +u32 mdev_us read_write - tcp_rtt_estimator +u32 mdev_max_us +u32 rttvar_us - read_mostly __tcp_set_rto +u32 rtt_seq read_write tcp_rtt_estimator +struct_minmax rtt_min - read_mostly tcp_min_rtt/tcp_rate_gen,tcp_min_rtttcp_update_rtt_min +u32 packets_out read_write read_write tcp_packets_in_flight(tx/rx);tcp_slow_start_after_idle_check,tcp_nagle_check,tcp_rate_skb_sent,tcp_event_new_data_sent,tcp_cwnd_validate,tcp_write_xmit(tx);tcp_ack,tcp_clean_rtx_queue,tcp_update_pacing_rate(rx) +u32 retrans_out - read_mostly tcp_packets_in_flight,tcp_rate_check_app_limited +u32 max_packets_out - read_write tcp_cwnd_validate +u32 cwnd_usage_seq - read_write tcp_cwnd_validate +u16 urg_data - read_mostly tcp_fast_path_check +u8 ecn_flags read_write - tcp_ecn_send +u8 keepalive_probes +u32 reordering read_mostly - tcp_sndbuf_expand +u32 reord_seen +u32 snd_up read_write read_mostly tcp_mark_urg,tcp_urg_mode,__tcp_transmit_skb(tx);tcp_clean_rtx_queue(rx) +struct_tcp_options_received rx_opt read_mostly read_write tcp_established_options(tx);tcp_fast_path_on,tcp_ack_update_window,tcp_is_sack,tcp_data_queue,tcp_rcv_established,tcp_ack_update_rtt(rx) +u32 snd_ssthresh - read_mostly tcp_update_pacing_rate +u32 snd_cwnd read_mostly read_mostly tcp_snd_cwnd,tcp_rate_check_app_limited,tcp_tso_should_defer(tx);tcp_update_pacing_rate +u32 snd_cwnd_cnt +u32 snd_cwnd_clamp +u32 snd_cwnd_used +u32 snd_cwnd_stamp +u32 prior_cwnd +u32 prr_delivered +u32 prr_out read_mostly read_mostly tcp_rate_skb_sent,tcp_newly_delivered(tx);tcp_ack,tcp_rate_gen,tcp_clean_rtx_queue(rx) +u32 delivered read_mostly read_write tcp_rate_skb_sent, tcp_newly_delivered(tx);tcp_ack, tcp_rate_gen, tcp_clean_rtx_queue (rx) +u32 delivered_ce read_mostly read_write tcp_rate_skb_sent(tx);tcp_rate_gen(rx) +u32 lost - read_mostly tcp_ack +u32 app_limited read_write read_mostly tcp_rate_check_app_limited,tcp_rate_skb_sent(tx);tcp_rate_gen(rx) +u64 first_tx_mstamp read_write - tcp_rate_skb_sent +u64 delivered_mstamp read_write - tcp_rate_skb_sent +u32 rate_delivered - read_mostly tcp_rate_gen +u32 rate_interval_us - read_mostly rate_delivered,rate_app_limited +u32 rcv_wnd read_write read_mostly tcp_select_window,tcp_receive_window,tcp_fast_path_check +u32 write_seq read_write - tcp_rate_check_app_limited,tcp_write_queue_empty,tcp_skb_entail,forced_push,tcp_mark_push +u32 notsent_lowat read_mostly - tcp_stream_memory_free +u32 pushed_seq read_write - tcp_mark_push,forced_push +u32 lost_out read_mostly read_mostly tcp_left_out(tx);tcp_packets_in_flight(tx/rx);tcp_rate_check_app_limited(rx) +u32 sacked_out read_mostly read_mostly tcp_left_out(tx);tcp_packets_in_flight(tx/rx);tcp_clean_rtx_queue(rx) +struct_hrtimer pacing_timer +struct_hrtimer compressed_ack_timer +struct_sk_buff* lost_skb_hint read_mostly tcp_clean_rtx_queue +struct_sk_buff* retransmit_skb_hint read_mostly - tcp_clean_rtx_queue +struct_rb_root out_of_order_queue - read_mostly tcp_data_queue,tcp_fast_path_check +struct_sk_buff* ooo_last_skb +struct_tcp_sack_block[1] duplicate_sack +struct_tcp_sack_block[4] selective_acks +struct_tcp_sack_block[4] recv_sack_cache +struct_sk_buff* highest_sack read_write - tcp_event_new_data_sent +int lost_cnt_hint +u32 prior_ssthresh +u32 high_seq +u32 retrans_stamp +u32 undo_marker +int undo_retrans +u64 bytes_retrans +u32 total_retrans +u32 rto_stamp +u16 total_rto +u16 total_rto_recoveries +u32 total_rto_time +u32 urg_seq - - +unsigned_int keepalive_time +unsigned_int keepalive_intvl +int linger2 +u8 bpf_sock_ops_cb_flags +u8:1 bpf_chg_cc_inprogress +u16 timeout_rehash +u32 rcv_ooopack +u32 rcv_rtt_last_tsecr +struct rcv_rtt_est - read_write tcp_rcv_space_adjust,tcp_rcv_established +struct rcvq_space - read_write tcp_rcv_space_adjust +struct mtu_probe +u32 plb_rehash +u32 mtu_info +bool is_mptcp +bool smc_hs_congested +bool syn_smc +struct_tcp_sock_af_ops* af_specific +struct_tcp_md5sig_info* md5sig_info +struct_tcp_fastopen_request* fastopen_req +struct_request_sock* fastopen_rsk +struct_saved_syn* saved_syn \ No newline at end of file diff --git a/MAINTAINERS b/MAINTAINERS index e3fd148d462e..2d2865c1e479 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14986,6 +14986,7 @@ Q: https://patchwork.kernel.org/project/netdevbpf/list/ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/devicetree/bindings/net/ +F: Documentation/networking/net_cachelines/net_device.rst F: drivers/connector/ F: drivers/net/ F: include/dt-bindings/net/ @@ -15041,6 +15042,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git F: Documentation/core-api/netlink.rst F: Documentation/netlink/ F: Documentation/networking/ +F: Documentation/networking/net_cachelines/ F: Documentation/process/maintainer-netdev.rst F: Documentation/userspace-api/netlink/ F: include/linux/in.h @@ -15149,6 +15151,7 @@ NETWORKING [TCP] M: Eric Dumazet L: netdev@vger.kernel.org S: Maintained +F: Documentation/networking/net_cachelines/tcp_sock.rst F: include/linux/tcp.h F: include/net/tcp.h F: include/trace/events/tcp.h -- cgit v1.3.1 From 8ad55b1e73c4e77656e2bea68fa2b484f33a6425 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 1 Dec 2023 19:01:54 +0100 Subject: docs: netlink: add NLMSG_DONE message format for doit actions In case NLMSG_DONE message is sent as a reply to doit action, multiple kernel implementation do not send anything else than struct nlmsghdr. Add this note to the Netlink intro documentation. Signed-off-by: Jiri Pirko Reviewed-by: Bagas Sanjaya Link: https://lore.kernel.org/r/20231201180154.864007-1-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/userspace-api/netlink/intro.rst | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'Documentation') diff --git a/Documentation/userspace-api/netlink/intro.rst b/Documentation/userspace-api/netlink/intro.rst index 7b1d401210ef..aacffade8f84 100644 --- a/Documentation/userspace-api/netlink/intro.rst +++ b/Documentation/userspace-api/netlink/intro.rst @@ -234,6 +234,10 @@ ACK attributes may be present:: | ** optionally extended ACK | ---------------------------------------------- +Note that some implementations may issue custom ``NLMSG_DONE`` messages +in reply to ``do`` action requests. In that case the payload is +implementation-specific and may also be absent. + .. _res_fam: Resolving the Family ID -- cgit v1.3.1 From bc877956272f0521fef107838555817112a450dc Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:28:29 -0800 Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for queue Add support in netlink spec(netdev.yaml) for queue information. Add code generated from the spec. Note: The "queue-type" attribute takes values 0 and 1 for rx and tx queue type respectively. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147330963.5260.2576294626647300472.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 52 +++++++++++ include/uapi/linux/netdev.h | 16 ++++ net/core/netdev-genl-gen.c | 26 ++++++ net/core/netdev-genl-gen.h | 3 + net/core/netdev-genl.c | 10 +++ tools/include/uapi/linux/netdev.h | 16 ++++ tools/net/ynl/generated/netdev-user.c | 153 ++++++++++++++++++++++++++++++++ tools/net/ynl/generated/netdev-user.h | 99 +++++++++++++++++++++ 8 files changed, 375 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index eef6358ec587..719e6aafbfdb 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -66,6 +66,10 @@ definitions: name: tx-checksum doc: L3 checksum HW offload is supported by the driver. + - + name: queue-type + type: enum + entries: [ rx, tx ] attribute-sets: - @@ -209,6 +213,31 @@ attribute-sets: name: recycle-released-refcnt type: uint + - + name: queue + attributes: + - + name: id + doc: Queue index; most queue types are indexed like a C array, with + indexes starting at 0 and ending at queue count - 1. Queue indexes + are scoped to an interface and queue type. + type: u32 + - + name: ifindex + doc: ifindex of the netdevice to which the queue belongs. + type: u32 + checks: + min: 1 + - + name: type + doc: Queue type as rx, tx. Each queue type defines a separate ID space. + type: u32 + enum: queue-type + - + name: napi-id + doc: ID of the NAPI instance which services this queue. + type: u32 + operations: list: - @@ -307,6 +336,29 @@ operations: dump: reply: *pp-stats-reply config-cond: page-pool-stats + - + name: queue-get + doc: Get queue information from the kernel. + Only configured queues will be reported (as opposed to all available + hardware queues). + attribute-set: queue + do: + request: + attributes: + - ifindex + - type + - id + reply: &queue-get-op + attributes: + - id + - type + - napi-id + - ifindex + dump: + request: + attributes: + - ifindex + reply: *queue-get-op mcast-groups: list: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6244c0164976..f857960a7f06 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -62,6 +62,11 @@ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, }; +enum netdev_queue_type { + NETDEV_QUEUE_TYPE_RX, + NETDEV_QUEUE_TYPE_TX, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -104,6 +109,16 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_QUEUE_ID = 1, + NETDEV_A_QUEUE_IFINDEX, + NETDEV_A_QUEUE_TYPE, + NETDEV_A_QUEUE_NAPI_ID, + + __NETDEV_A_QUEUE_MAX, + NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -114,6 +129,7 @@ enum { NETDEV_CMD_PAGE_POOL_DEL_NTF, NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, + NETDEV_CMD_QUEUE_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index dccd8c3a141e..b1dcf88c82cf 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -46,6 +46,18 @@ static const struct nla_policy netdev_page_pool_stats_get_nl_policy[NETDEV_A_PAG }; #endif /* CONFIG_PAGE_POOL_STATS */ +/* NETDEV_CMD_QUEUE_GET - do */ +static const struct nla_policy netdev_queue_get_do_nl_policy[NETDEV_A_QUEUE_TYPE + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), + [NETDEV_A_QUEUE_ID] = { .type = NLA_U32, }, +}; + +/* NETDEV_CMD_QUEUE_GET - dump */ +static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IFINDEX + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -88,6 +100,20 @@ static const struct genl_split_ops netdev_nl_ops[] = { .flags = GENL_CMD_CAP_DUMP, }, #endif /* CONFIG_PAGE_POOL_STATS */ + { + .cmd = NETDEV_CMD_QUEUE_GET, + .doit = netdev_nl_queue_get_doit, + .policy = netdev_queue_get_do_nl_policy, + .maxattr = NETDEV_A_QUEUE_TYPE, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_QUEUE_GET, + .dumpit = netdev_nl_queue_get_dumpit, + .policy = netdev_queue_get_dump_nl_policy, + .maxattr = NETDEV_A_QUEUE_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 649e4b46eccf..086623c1797a 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -23,6 +23,9 @@ int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_queue_get_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 10f2124e9e23..35e2d692f651 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -140,6 +140,16 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static int netdev_genl_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 6244c0164976..f857960a7f06 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -62,6 +62,11 @@ enum netdev_xsk_flags { NETDEV_XSK_FLAGS_TX_CHECKSUM = 2, }; +enum netdev_queue_type { + NETDEV_QUEUE_TYPE_RX, + NETDEV_QUEUE_TYPE_TX, +}; + enum { NETDEV_A_DEV_IFINDEX = 1, NETDEV_A_DEV_PAD, @@ -104,6 +109,16 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_QUEUE_ID = 1, + NETDEV_A_QUEUE_IFINDEX, + NETDEV_A_QUEUE_TYPE, + NETDEV_A_QUEUE_NAPI_ID, + + __NETDEV_A_QUEUE_MAX, + NETDEV_A_QUEUE_MAX = (__NETDEV_A_QUEUE_MAX - 1) +}; + enum { NETDEV_CMD_DEV_GET = 1, NETDEV_CMD_DEV_ADD_NTF, @@ -114,6 +129,7 @@ enum { NETDEV_CMD_PAGE_POOL_DEL_NTF, NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, + NETDEV_CMD_QUEUE_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 3b9dee94d4ce..fbf7e24ade91 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -23,6 +23,7 @@ static const char * const netdev_op_strmap[] = { [NETDEV_CMD_PAGE_POOL_DEL_NTF] = "page-pool-del-ntf", [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf", [NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get", + [NETDEV_CMD_QUEUE_GET] = "queue-get", }; const char *netdev_op_str(int op) @@ -76,6 +77,18 @@ const char *netdev_xsk_flags_str(enum netdev_xsk_flags value) return netdev_xsk_flags_strmap[value]; } +static const char * const netdev_queue_type_strmap[] = { + [0] = "rx", + [1] = "tx", +}; + +const char *netdev_queue_type_str(enum netdev_queue_type value) +{ + if (value < 0 || value >= (int)MNL_ARRAY_SIZE(netdev_queue_type_strmap)) + return NULL; + return netdev_queue_type_strmap[value]; +} + /* Policies */ struct ynl_policy_attr netdev_page_pool_info_policy[NETDEV_A_PAGE_POOL_MAX + 1] = { [NETDEV_A_PAGE_POOL_ID] = { .name = "id", .type = YNL_PT_UINT, }, @@ -135,6 +148,18 @@ struct ynl_policy_nest netdev_page_pool_stats_nest = { .table = netdev_page_pool_stats_policy, }; +struct ynl_policy_attr netdev_queue_policy[NETDEV_A_QUEUE_MAX + 1] = { + [NETDEV_A_QUEUE_ID] = { .name = "id", .type = YNL_PT_U32, }, + [NETDEV_A_QUEUE_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, + [NETDEV_A_QUEUE_TYPE] = { .name = "type", .type = YNL_PT_U32, }, + [NETDEV_A_QUEUE_NAPI_ID] = { .name = "napi-id", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest netdev_queue_nest = { + .max_attr = NETDEV_A_QUEUE_MAX, + .table = netdev_queue_policy, +}; + /* Common nested types */ void netdev_page_pool_info_free(struct netdev_page_pool_info *obj) { @@ -617,6 +642,134 @@ free_list: return NULL; } +/* ============== NETDEV_CMD_QUEUE_GET ============== */ +/* NETDEV_CMD_QUEUE_GET - do */ +void netdev_queue_get_req_free(struct netdev_queue_get_req *req) +{ + free(req); +} + +void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp) +{ + free(rsp); +} + +int netdev_queue_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct netdev_queue_get_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_QUEUE_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.id = 1; + dst->id = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_QUEUE_TYPE) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.type = 1; + dst->type = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_QUEUE_NAPI_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.napi_id = 1; + dst->napi_id = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_QUEUE_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct netdev_queue_get_rsp * +netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct netdev_queue_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1); + ys->req_policy = &netdev_queue_nest; + yrs.yarg.rsp_policy = &netdev_queue_nest; + + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex); + if (req->_present.type) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_TYPE, req->type); + if (req->_present.id) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_ID, req->id); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = netdev_queue_get_rsp_parse; + yrs.rsp_cmd = NETDEV_CMD_QUEUE_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + netdev_queue_get_rsp_free(rsp); + return NULL; +} + +/* NETDEV_CMD_QUEUE_GET - dump */ +void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp) +{ + struct netdev_queue_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp); + } +} + +struct netdev_queue_get_list * +netdev_queue_get_dump(struct ynl_sock *ys, + struct netdev_queue_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct netdev_queue_get_list); + yds.cb = netdev_queue_get_rsp_parse; + yds.rsp_cmd = NETDEV_CMD_QUEUE_GET; + yds.rsp_policy = &netdev_queue_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_QUEUE_GET, 1); + ys->req_policy = &netdev_queue_nest; + + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_QUEUE_IFINDEX, req->ifindex); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + netdev_queue_get_list_free(yds.first); + return NULL; +} + static const struct ynl_ntf_info netdev_ntf_info[] = { [NETDEV_CMD_DEV_ADD_NTF] = { .alloc_sz = sizeof(struct netdev_dev_get_ntf), diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index cc3d80d1cf8c..d7daf6df3df0 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -20,6 +20,7 @@ const char *netdev_op_str(int op); const char *netdev_xdp_act_str(enum netdev_xdp_act value); const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value); const char *netdev_xsk_flags_str(enum netdev_xsk_flags value); +const char *netdev_queue_type_str(enum netdev_queue_type value); /* Common nested types */ struct netdev_page_pool_info { @@ -261,4 +262,102 @@ netdev_page_pool_stats_get_list_free(struct netdev_page_pool_stats_get_list *rsp struct netdev_page_pool_stats_get_list * netdev_page_pool_stats_get_dump(struct ynl_sock *ys); +/* ============== NETDEV_CMD_QUEUE_GET ============== */ +/* NETDEV_CMD_QUEUE_GET - do */ +struct netdev_queue_get_req { + struct { + __u32 ifindex:1; + __u32 type:1; + __u32 id:1; + } _present; + + __u32 ifindex; + enum netdev_queue_type type; + __u32 id; +}; + +static inline struct netdev_queue_get_req *netdev_queue_get_req_alloc(void) +{ + return calloc(1, sizeof(struct netdev_queue_get_req)); +} +void netdev_queue_get_req_free(struct netdev_queue_get_req *req); + +static inline void +netdev_queue_get_req_set_ifindex(struct netdev_queue_get_req *req, + __u32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} +static inline void +netdev_queue_get_req_set_type(struct netdev_queue_get_req *req, + enum netdev_queue_type type) +{ + req->_present.type = 1; + req->type = type; +} +static inline void +netdev_queue_get_req_set_id(struct netdev_queue_get_req *req, __u32 id) +{ + req->_present.id = 1; + req->id = id; +} + +struct netdev_queue_get_rsp { + struct { + __u32 id:1; + __u32 type:1; + __u32 napi_id:1; + __u32 ifindex:1; + } _present; + + __u32 id; + enum netdev_queue_type type; + __u32 napi_id; + __u32 ifindex; +}; + +void netdev_queue_get_rsp_free(struct netdev_queue_get_rsp *rsp); + +/* + * Get queue information from the kernel. Only configured queues will be reported (as opposed to all available hardware queues). + */ +struct netdev_queue_get_rsp * +netdev_queue_get(struct ynl_sock *ys, struct netdev_queue_get_req *req); + +/* NETDEV_CMD_QUEUE_GET - dump */ +struct netdev_queue_get_req_dump { + struct { + __u32 ifindex:1; + } _present; + + __u32 ifindex; +}; + +static inline struct netdev_queue_get_req_dump * +netdev_queue_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct netdev_queue_get_req_dump)); +} +void netdev_queue_get_req_dump_free(struct netdev_queue_get_req_dump *req); + +static inline void +netdev_queue_get_req_dump_set_ifindex(struct netdev_queue_get_req_dump *req, + __u32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} + +struct netdev_queue_get_list { + struct netdev_queue_get_list *next; + struct netdev_queue_get_rsp obj __attribute__((aligned(8))); +}; + +void netdev_queue_get_list_free(struct netdev_queue_get_list *rsp); + +struct netdev_queue_get_list * +netdev_queue_get_dump(struct ynl_sock *ys, + struct netdev_queue_get_req_dump *req); + #endif /* _LINUX_NETDEV_GEN_H */ -- cgit v1.3.1 From ff9991499fb53575c45eb92cd064bcd7141bb572 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:28:51 -0800 Subject: netdev-genl: spec: Extend netdev netlink spec in YAML for NAPI Add support in netlink spec(netdev.yaml) for napi related information. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147333119.5260.7050639053080529108.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 30 ++++++++ include/uapi/linux/netdev.h | 9 +++ net/core/netdev-genl-gen.c | 24 +++++++ net/core/netdev-genl-gen.h | 2 + net/core/netdev-genl.c | 10 +++ tools/include/uapi/linux/netdev.h | 9 +++ tools/net/ynl/generated/netdev-user.c | 124 ++++++++++++++++++++++++++++++++ tools/net/ynl/generated/netdev-user.h | 75 +++++++++++++++++++ 8 files changed, 283 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 719e6aafbfdb..76d6b2e15b67 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -213,6 +213,19 @@ attribute-sets: name: recycle-released-refcnt type: uint + - + name: napi + attributes: + - + name: ifindex + doc: ifindex of the netdevice to which NAPI instance belongs. + type: u32 + checks: + min: 1 + - + name: id + doc: ID of the NAPI instance. + type: u32 - name: queue attributes: @@ -359,6 +372,23 @@ operations: attributes: - ifindex reply: *queue-get-op + - + name: napi-get + doc: Get information about NAPI instances configured on the system. + attribute-set: napi + do: + request: + attributes: + - id + reply: &napi-get-op + attributes: + - id + - ifindex + dump: + request: + attributes: + - ifindex + reply: *napi-get-op mcast-groups: list: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index f857960a7f06..e7bdbcb01f22 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -109,6 +109,14 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_NAPI_IFINDEX = 1, + NETDEV_A_NAPI_ID, + + __NETDEV_A_NAPI_MAX, + NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -130,6 +138,7 @@ enum { NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, + NETDEV_CMD_NAPI_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index b1dcf88c82cf..be7f2ebd61b2 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -58,6 +58,16 @@ static const struct nla_policy netdev_queue_get_dump_nl_policy[NETDEV_A_QUEUE_IF [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; +/* NETDEV_CMD_NAPI_GET - do */ +static const struct nla_policy netdev_napi_get_do_nl_policy[NETDEV_A_NAPI_ID + 1] = { + [NETDEV_A_NAPI_ID] = { .type = NLA_U32, }, +}; + +/* NETDEV_CMD_NAPI_GET - dump */ +static const struct nla_policy netdev_napi_get_dump_nl_policy[NETDEV_A_NAPI_IFINDEX + 1] = { + [NETDEV_A_NAPI_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -114,6 +124,20 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_QUEUE_IFINDEX, .flags = GENL_CMD_CAP_DUMP, }, + { + .cmd = NETDEV_CMD_NAPI_GET, + .doit = netdev_nl_napi_get_doit, + .policy = netdev_napi_get_do_nl_policy, + .maxattr = NETDEV_A_NAPI_ID, + .flags = GENL_CMD_CAP_DO, + }, + { + .cmd = NETDEV_CMD_NAPI_GET, + .dumpit = netdev_nl_napi_get_dumpit, + .policy = netdev_napi_get_dump_nl_policy, + .maxattr = NETDEV_A_NAPI_IFINDEX, + .flags = GENL_CMD_CAP_DUMP, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index 086623c1797a..a47f2bcbe4fa 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -26,6 +26,8 @@ int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb, int netdev_nl_queue_get_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_queue_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 3bc1661e6ebf..4c8ea6a4f015 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -155,6 +155,16 @@ int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +int netdev_nl_napi_get_doit(struct sk_buff *skb, struct genl_info *info) +{ + return -EOPNOTSUPP; +} + +int netdev_nl_napi_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index f857960a7f06..e7bdbcb01f22 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -109,6 +109,14 @@ enum { NETDEV_A_PAGE_POOL_STATS_MAX = (__NETDEV_A_PAGE_POOL_STATS_MAX - 1) }; +enum { + NETDEV_A_NAPI_IFINDEX = 1, + NETDEV_A_NAPI_ID, + + __NETDEV_A_NAPI_MAX, + NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) +}; + enum { NETDEV_A_QUEUE_ID = 1, NETDEV_A_QUEUE_IFINDEX, @@ -130,6 +138,7 @@ enum { NETDEV_CMD_PAGE_POOL_CHANGE_NTF, NETDEV_CMD_PAGE_POOL_STATS_GET, NETDEV_CMD_QUEUE_GET, + NETDEV_CMD_NAPI_GET, __NETDEV_CMD_MAX, NETDEV_CMD_MAX = (__NETDEV_CMD_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index fbf7e24ade91..906b61554698 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -24,6 +24,7 @@ static const char * const netdev_op_strmap[] = { [NETDEV_CMD_PAGE_POOL_CHANGE_NTF] = "page-pool-change-ntf", [NETDEV_CMD_PAGE_POOL_STATS_GET] = "page-pool-stats-get", [NETDEV_CMD_QUEUE_GET] = "queue-get", + [NETDEV_CMD_NAPI_GET] = "napi-get", }; const char *netdev_op_str(int op) @@ -160,6 +161,16 @@ struct ynl_policy_nest netdev_queue_nest = { .table = netdev_queue_policy, }; +struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = { + [NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, + [NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, }, +}; + +struct ynl_policy_nest netdev_napi_nest = { + .max_attr = NETDEV_A_NAPI_MAX, + .table = netdev_napi_policy, +}; + /* Common nested types */ void netdev_page_pool_info_free(struct netdev_page_pool_info *obj) { @@ -770,6 +781,119 @@ free_list: return NULL; } +/* ============== NETDEV_CMD_NAPI_GET ============== */ +/* NETDEV_CMD_NAPI_GET - do */ +void netdev_napi_get_req_free(struct netdev_napi_get_req *req) +{ + free(req); +} + +void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp) +{ + free(rsp); +} + +int netdev_napi_get_rsp_parse(const struct nlmsghdr *nlh, void *data) +{ + struct ynl_parse_arg *yarg = data; + struct netdev_napi_get_rsp *dst; + const struct nlattr *attr; + + dst = yarg->data; + + mnl_attr_for_each(attr, nlh, sizeof(struct genlmsghdr)) { + unsigned int type = mnl_attr_get_type(attr); + + if (type == NETDEV_A_NAPI_ID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.id = 1; + dst->id = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_NAPI_IFINDEX) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.ifindex = 1; + dst->ifindex = mnl_attr_get_u32(attr); + } + } + + return MNL_CB_OK; +} + +struct netdev_napi_get_rsp * +netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req) +{ + struct ynl_req_state yrs = { .yarg = { .ys = ys, }, }; + struct netdev_napi_get_rsp *rsp; + struct nlmsghdr *nlh; + int err; + + nlh = ynl_gemsg_start_req(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1); + ys->req_policy = &netdev_napi_nest; + yrs.yarg.rsp_policy = &netdev_napi_nest; + + if (req->_present.id) + mnl_attr_put_u32(nlh, NETDEV_A_NAPI_ID, req->id); + + rsp = calloc(1, sizeof(*rsp)); + yrs.yarg.data = rsp; + yrs.cb = netdev_napi_get_rsp_parse; + yrs.rsp_cmd = NETDEV_CMD_NAPI_GET; + + err = ynl_exec(ys, nlh, &yrs); + if (err < 0) + goto err_free; + + return rsp; + +err_free: + netdev_napi_get_rsp_free(rsp); + return NULL; +} + +/* NETDEV_CMD_NAPI_GET - dump */ +void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp) +{ + struct netdev_napi_get_list *next = rsp; + + while ((void *)next != YNL_LIST_END) { + rsp = next; + next = rsp->next; + + free(rsp); + } +} + +struct netdev_napi_get_list * +netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req) +{ + struct ynl_dump_state yds = {}; + struct nlmsghdr *nlh; + int err; + + yds.ys = ys; + yds.alloc_sz = sizeof(struct netdev_napi_get_list); + yds.cb = netdev_napi_get_rsp_parse; + yds.rsp_cmd = NETDEV_CMD_NAPI_GET; + yds.rsp_policy = &netdev_napi_nest; + + nlh = ynl_gemsg_start_dump(ys, ys->family_id, NETDEV_CMD_NAPI_GET, 1); + ys->req_policy = &netdev_napi_nest; + + if (req->_present.ifindex) + mnl_attr_put_u32(nlh, NETDEV_A_NAPI_IFINDEX, req->ifindex); + + err = ynl_exec_dump(ys, nlh, &yds); + if (err < 0) + goto free_list; + + return yds.first; + +free_list: + netdev_napi_get_list_free(yds.first); + return NULL; +} + static const struct ynl_ntf_info netdev_ntf_info[] = { [NETDEV_CMD_DEV_ADD_NTF] = { .alloc_sz = sizeof(struct netdev_dev_get_ntf), diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index d7daf6df3df0..481c9e45b689 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -360,4 +360,79 @@ struct netdev_queue_get_list * netdev_queue_get_dump(struct ynl_sock *ys, struct netdev_queue_get_req_dump *req); +/* ============== NETDEV_CMD_NAPI_GET ============== */ +/* NETDEV_CMD_NAPI_GET - do */ +struct netdev_napi_get_req { + struct { + __u32 id:1; + } _present; + + __u32 id; +}; + +static inline struct netdev_napi_get_req *netdev_napi_get_req_alloc(void) +{ + return calloc(1, sizeof(struct netdev_napi_get_req)); +} +void netdev_napi_get_req_free(struct netdev_napi_get_req *req); + +static inline void +netdev_napi_get_req_set_id(struct netdev_napi_get_req *req, __u32 id) +{ + req->_present.id = 1; + req->id = id; +} + +struct netdev_napi_get_rsp { + struct { + __u32 id:1; + __u32 ifindex:1; + } _present; + + __u32 id; + __u32 ifindex; +}; + +void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp); + +/* + * Get information about NAPI instances configured on the system. + */ +struct netdev_napi_get_rsp * +netdev_napi_get(struct ynl_sock *ys, struct netdev_napi_get_req *req); + +/* NETDEV_CMD_NAPI_GET - dump */ +struct netdev_napi_get_req_dump { + struct { + __u32 ifindex:1; + } _present; + + __u32 ifindex; +}; + +static inline struct netdev_napi_get_req_dump * +netdev_napi_get_req_dump_alloc(void) +{ + return calloc(1, sizeof(struct netdev_napi_get_req_dump)); +} +void netdev_napi_get_req_dump_free(struct netdev_napi_get_req_dump *req); + +static inline void +netdev_napi_get_req_dump_set_ifindex(struct netdev_napi_get_req_dump *req, + __u32 ifindex) +{ + req->_present.ifindex = 1; + req->ifindex = ifindex; +} + +struct netdev_napi_get_list { + struct netdev_napi_get_list *next; + struct netdev_napi_get_rsp obj __attribute__((aligned(8))); +}; + +void netdev_napi_get_list_free(struct netdev_napi_get_list *rsp); + +struct netdev_napi_get_list * +netdev_napi_get_dump(struct ynl_sock *ys, struct netdev_napi_get_req_dump *req); + #endif /* _LINUX_NETDEV_GEN_H */ -- cgit v1.3.1 From 5a5131d66fe02337de0b1b2e021b58f0f55c6df5 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:29:02 -0800 Subject: netdev-genl: spec: Add irq in netdev netlink YAML spec Add support in netlink spec(netdev.yaml) for interrupt number among the NAPI attributes. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147334210.5260.18178387869057516983.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 5 +++++ include/uapi/linux/netdev.h | 1 + tools/include/uapi/linux/netdev.h | 1 + tools/net/ynl/generated/netdev-user.c | 6 ++++++ tools/net/ynl/generated/netdev-user.h | 2 ++ 5 files changed, 15 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index 76d6b2e15b67..a3a1c6ad521b 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -226,6 +226,10 @@ attribute-sets: name: id doc: ID of the NAPI instance. type: u32 + - + name: irq + doc: The associated interrupt vector number for the napi + type: u32 - name: queue attributes: @@ -384,6 +388,7 @@ operations: attributes: - id - ifindex + - irq dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index e7bdbcb01f22..30fea409b71e 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -112,6 +112,7 @@ enum { enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, + NETDEV_A_NAPI_IRQ, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index e7bdbcb01f22..30fea409b71e 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -112,6 +112,7 @@ enum { enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, + NETDEV_A_NAPI_IRQ, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 906b61554698..58e5196da4bd 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -164,6 +164,7 @@ struct ynl_policy_nest netdev_queue_nest = { struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = { [NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, [NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, }, + [NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, }, }; struct ynl_policy_nest netdev_napi_nest = { @@ -210,6 +211,11 @@ int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg, return MNL_CB_ERROR; dst->_present.ifindex = 1; dst->ifindex = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_NAPI_IRQ) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.irq = 1; + dst->irq = mnl_attr_get_u32(attr); } } diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 481c9e45b689..0c3224017c12 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -387,10 +387,12 @@ struct netdev_napi_get_rsp { struct { __u32 id:1; __u32 ifindex:1; + __u32 irq:1; } _present; __u32 id; __u32 ifindex; + __u32 irq; }; void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp); -- cgit v1.3.1 From 8481a249a0eaf0000dbb18f7689ccd50ea9835cd Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Fri, 1 Dec 2023 15:29:13 -0800 Subject: netdev-genl: spec: Add PID in netdev netlink YAML spec Add support in netlink spec(netdev.yaml) for PID of the NAPI thread. Add code generated from the spec. Signed-off-by: Amritha Nambiar Reviewed-by: Sridhar Samudrala Link: https://lore.kernel.org/r/170147335301.5260.11872351477120434501.stgit@anambiarhost.jf.intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/netdev.yaml | 7 +++++++ include/uapi/linux/netdev.h | 1 + tools/include/uapi/linux/netdev.h | 1 + tools/net/ynl/generated/netdev-user.c | 6 ++++++ tools/net/ynl/generated/netdev-user.h | 2 ++ 5 files changed, 17 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index a3a1c6ad521b..f2c76d103bd8 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -230,6 +230,12 @@ attribute-sets: name: irq doc: The associated interrupt vector number for the napi type: u32 + - + name: pid + doc: PID of the napi thread, if NAPI is configured to operate in + threaded mode. If NAPI is not in threaded mode (i.e. uses normal + softirq context), the attribute will be absent. + type: u32 - name: queue attributes: @@ -389,6 +395,7 @@ operations: - id - ifindex - irq + - pid dump: request: attributes: diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 30fea409b71e..424c5e28f495 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -113,6 +113,7 @@ enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, + NETDEV_A_NAPI_PID, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 30fea409b71e..424c5e28f495 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -113,6 +113,7 @@ enum { NETDEV_A_NAPI_IFINDEX = 1, NETDEV_A_NAPI_ID, NETDEV_A_NAPI_IRQ, + NETDEV_A_NAPI_PID, __NETDEV_A_NAPI_MAX, NETDEV_A_NAPI_MAX = (__NETDEV_A_NAPI_MAX - 1) diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 58e5196da4bd..ed8bcb855a1d 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -165,6 +165,7 @@ struct ynl_policy_attr netdev_napi_policy[NETDEV_A_NAPI_MAX + 1] = { [NETDEV_A_NAPI_IFINDEX] = { .name = "ifindex", .type = YNL_PT_U32, }, [NETDEV_A_NAPI_ID] = { .name = "id", .type = YNL_PT_U32, }, [NETDEV_A_NAPI_IRQ] = { .name = "irq", .type = YNL_PT_U32, }, + [NETDEV_A_NAPI_PID] = { .name = "pid", .type = YNL_PT_U32, }, }; struct ynl_policy_nest netdev_napi_nest = { @@ -216,6 +217,11 @@ int netdev_page_pool_info_parse(struct ynl_parse_arg *yarg, return MNL_CB_ERROR; dst->_present.irq = 1; dst->irq = mnl_attr_get_u32(attr); + } else if (type == NETDEV_A_NAPI_PID) { + if (ynl_attr_validate(yarg, attr)) + return MNL_CB_ERROR; + dst->_present.pid = 1; + dst->pid = mnl_attr_get_u32(attr); } } diff --git a/tools/net/ynl/generated/netdev-user.h b/tools/net/ynl/generated/netdev-user.h index 0c3224017c12..3830cf2ab6b8 100644 --- a/tools/net/ynl/generated/netdev-user.h +++ b/tools/net/ynl/generated/netdev-user.h @@ -388,11 +388,13 @@ struct netdev_napi_get_rsp { __u32 id:1; __u32 ifindex:1; __u32 irq:1; + __u32 pid:1; } _present; __u32 id; __u32 ifindex; __u32 irq; + __u32 pid; }; void netdev_napi_get_rsp_free(struct netdev_napi_get_rsp *rsp); -- cgit v1.3.1 From e8a4195d843fe81a86a97db9ce830c78bfd3f19b Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:41 +0800 Subject: docs: bridge: update doc format to rst The current bridge kernel doc is too old. It only pointed to the linuxfoundation wiki page which lacks of the new features. Here let's start the new bridge document and put all the bridge info so new developers and users could catch up the last bridge status soon. In this patch, Convert the doc to rst format. Add bridge brief introduction, FAQ and contact info. Reviewed-by: Florian Fainelli Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 47 +++++++++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index c859f3c1636e..6ad8b42b2c50 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -4,18 +4,45 @@ Ethernet Bridging ================= -In order to use the Ethernet bridging functionality, you'll need the -userspace tools. +Introduction +============ -Documentation for Linux bridging is on: - https://wiki.linuxfoundation.org/networking/bridge +The IEEE 802.1Q-2022 (Bridges and Bridged Networks) standard defines the +operation of bridges in computer networks. A bridge, in the context of this +standard, is a device that connects two or more network segments and operates +at the data link layer (Layer 2) of the OSI (Open Systems Interconnection) +model. The purpose of a bridge is to filter and forward frames between +different segments based on the destination MAC (Media Access Control) address. -The bridge-utilities are maintained at: - git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/bridge-utils.git +FAQ +=== -Additionally, the iproute2 utilities can be used to configure -bridge devices. +What does a bridge do? +---------------------- -If you still have questions, don't hesitate to post to the mailing list -(more info https://lists.linux-foundation.org/mailman/listinfo/bridge). +A bridge transparently forwards traffic between multiple network interfaces. +In plain English this means that a bridge connects two or more physical +Ethernet networks, to form one larger (logical) Ethernet network. +Is it L3 protocol independent? +------------------------------ + +Yes. The bridge sees all frames, but it *uses* only L2 headers/information. +As such, the bridging functionality is protocol independent, and there should +be no trouble forwarding IPX, NetBEUI, IP, IPv6, etc. + +Contact Info +============ + +The code is currently maintained by Roopa Prabhu and +Nikolay Aleksandrov . Bridge bugs and enhancements +are discussed on the linux-netdev mailing list netdev@vger.kernel.org and +bridge@lists.linux-foundation.org. + +The list is open to anyone interested: http://vger.kernel.org/vger-lists.html#netdev + +External Links +============== + +The old Documentation for Linux bridging is on: +https://wiki.linuxfoundation.org/networking/bridge -- cgit v1.3.1 From bcc1f84e4d3480636d8ff7458bfdda9ff908a3d7 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:44 +0800 Subject: docs: bridge: Add kAPI/uAPI fields Add kAPI/uAPI field for bridge doc. Update struct net_bridge_vlan comments to fix doc build warning. Reviewed-by: Florian Fainelli Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 33 +++++++++++++++++++++++++++++++++ net/bridge/br_private.h | 2 ++ 2 files changed, 35 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index 6ad8b42b2c50..a717563eaa15 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -14,6 +14,39 @@ at the data link layer (Layer 2) of the OSI (Open Systems Interconnection) model. The purpose of a bridge is to filter and forward frames between different segments based on the destination MAC (Media Access Control) address. +Bridge kAPI +=========== + +Here are some core structures of bridge code. Note that the kAPI is *unstable*, +and can be changed at any time. + +.. kernel-doc:: net/bridge/br_private.h + :identifiers: net_bridge_vlan + +Bridge uAPI +=========== + +Modern Linux bridge uAPI is accessed via Netlink interface. You can find +below files where the bridge and bridge port netlink attributes are defined. + +Bridge netlink attributes +------------------------- + +.. kernel-doc:: include/uapi/linux/if_link.h + :doc: Bridge enum definition + +Bridge port netlink attributes +------------------------------ + +.. kernel-doc:: include/uapi/linux/if_link.h + :doc: Bridge port enum definition + +Bridge sysfs +------------ + +The sysfs interface is deprecated and should not be extended if new +options are added. + FAQ === diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 6b7f36769d03..051ea81864ac 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -186,6 +186,7 @@ enum { * struct net_bridge_vlan - per-vlan entry * * @vnode: rhashtable member + * @tnode: rhashtable member * @vid: VLAN id * @flags: bridge vlan flags * @priv_flags: private (in-kernel) bridge vlan flags @@ -196,6 +197,7 @@ enum { * @refcnt: if MASTER flag set, this is bumped for each port referencing it * @brvlan: if MASTER flag unset, this points to the global per-VLAN context * for this VLAN entry + * @tinfo: bridge tunnel info * @br_mcast_ctx: if MASTER flag set, this is the global vlan multicast context * @port_mcast_ctx: if MASTER flag unset, this is the per-port/vlan multicast * context -- cgit v1.3.1 From 567d2608209ffd694b19ee2c602d02aeae7fd16d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:45 +0800 Subject: docs: bridge: add STP doc Add STP part for bridge document. Reviewed-by: Florian Fainelli Signed-off-by: Hangbin Liu Acked-by: Nikolay Aleksandrov Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 101 ++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index a717563eaa15..c14410008ddf 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -47,6 +47,107 @@ Bridge sysfs The sysfs interface is deprecated and should not be extended if new options are added. +STP +=== + +The STP (Spanning Tree Protocol) implementation in the Linux bridge driver +is a critical feature that helps prevent loops and broadcast storms in +Ethernet networks by identifying and disabling redundant links. In a Linux +bridge context, STP is crucial for network stability and availability. + +STP is a Layer 2 protocol that operates at the Data Link Layer of the OSI +model. It was originally developed as IEEE 802.1D and has since evolved into +multiple versions, including Rapid Spanning Tree Protocol (RSTP) and +`Multiple Spanning Tree Protocol (MSTP) +`_. + +The 802.1D-2004 removed the original Spanning Tree Protocol, instead +incorporating the Rapid Spanning Tree Protocol (RSTP). By 2014, all the +functionality defined by IEEE 802.1D has been incorporated into either +IEEE 802.1Q (Bridges and Bridged Networks) or IEEE 802.1AC (MAC Service +Definition). 802.1D has been officially withdrawn in 2022. + +Bridge Ports and STP States +--------------------------- + +In the context of STP, bridge ports can be in one of the following states: + * Blocking: The port is disabled for data traffic and only listens for + BPDUs (Bridge Protocol Data Units) from other devices to determine the + network topology. + * Listening: The port begins to participate in the STP process and listens + for BPDUs. + * Learning: The port continues to listen for BPDUs and begins to learn MAC + addresses from incoming frames but does not forward data frames. + * Forwarding: The port is fully operational and forwards both BPDUs and + data frames. + * Disabled: The port is administratively disabled and does not participate + in the STP process. The data frames forwarding are also disabled. + +Root Bridge and Convergence +--------------------------- + +In the context of networking and Ethernet bridging in Linux, the root bridge +is a designated switch in a bridged network that serves as a reference point +for the spanning tree algorithm to create a loop-free topology. + +Here's how the STP works and root bridge is chosen: + 1. Bridge Priority: Each bridge running a spanning tree protocol, has a + configurable Bridge Priority value. The lower the value, the higher the + priority. By default, the Bridge Priority is set to a standard value + (e.g., 32768). + 2. Bridge ID: The Bridge ID is composed of two components: Bridge Priority + and the MAC address of the bridge. It uniquely identifies each bridge + in the network. The Bridge ID is used to compare the priorities of + different bridges. + 3. Bridge Election: When the network starts, all bridges initially assume + that they are the root bridge. They start advertising Bridge Protocol + Data Units (BPDU) to their neighbors, containing their Bridge ID and + other information. + 4. BPDU Comparison: Bridges exchange BPDUs to determine the root bridge. + Each bridge examines the received BPDUs, including the Bridge Priority + and Bridge ID, to determine if it should adjust its own priorities. + The bridge with the lowest Bridge ID will become the root bridge. + 5. Root Bridge Announcement: Once the root bridge is determined, it sends + BPDUs with information about the root bridge to all other bridges in the + network. This information is used by other bridges to calculate the + shortest path to the root bridge and, in doing so, create a loop-free + topology. + 6. Forwarding Ports: After the root bridge is selected and the spanning tree + topology is established, each bridge determines which of its ports should + be in the forwarding state (used for data traffic) and which should be in + the blocking state (used to prevent loops). The root bridge's ports are + all in the forwarding state. while other bridges have some ports in the + blocking state to avoid loops. + 7. Root Ports: After the root bridge is selected and the spanning tree + topology is established, each non-root bridge processes incoming + BPDUs and determines which of its ports provides the shortest path to the + root bridge based on the information in the received BPDUs. This port is + designated as the root port. And it is in the Forwarding state, allowing + it to actively forward network traffic. + 8. Designated ports: A designated port is the port through which the non-root + bridge will forward traffic towards the designated segment. Designated ports + are placed in the Forwarding state. All other ports on the non-root + bridge that are not designated for specific segments are placed in the + Blocking state to prevent network loops. + +STP ensures network convergence by calculating the shortest path and disabling +redundant links. When network topology changes occur (e.g., a link failure), +STP recalculates the network topology to restore connectivity while avoiding loops. + +Proper configuration of STP parameters, such as the bridge priority, can +influence network performance, path selection and which bridge becomes the +Root Bridge. + +User space STP helper +--------------------- + +The user space STP helper *bridge-stp* is a program to control whether to use +user mode spanning tree. The ``/sbin/bridge-stp `` is +called by the kernel when STP is enabled/disabled on a bridge +(via ``brctl stp `` or ``ip link set type bridge +stp_state <0|1>``). The kernel enables user_stp mode if that command returns +0, or enables kernel_stp mode if that command returns any other value. + FAQ === -- cgit v1.3.1 From 041a6ac4bf792eaf4c5898b3a744e98cfdc43a7a Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:46 +0800 Subject: docs: bridge: add VLAN doc Add VLAN part for bridge document. Acked-by: Nikolay Aleksandrov Reviewed-by: Florian Fainelli Reviewed-by: Vladimir Oltean Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index c14410008ddf..97936b9564fd 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -148,6 +148,35 @@ called by the kernel when STP is enabled/disabled on a bridge stp_state <0|1>``). The kernel enables user_stp mode if that command returns 0, or enables kernel_stp mode if that command returns any other value. +VLAN +==== + +A LAN (Local Area Network) is a network that covers a small geographic area, +typically within a single building or a campus. LANs are used to connect +computers, servers, printers, and other networked devices within a localized +area. LANs can be wired (using Ethernet cables) or wireless (using Wi-Fi). + +A VLAN (Virtual Local Area Network) is a logical segmentation of a physical +network into multiple isolated broadcast domains. VLANs are used to divide +a single physical LAN into multiple virtual LANs, allowing different groups of +devices to communicate as if they were on separate physical networks. + +Typically there are two VLAN implementations, IEEE 802.1Q and IEEE 802.1ad +(also known as QinQ). IEEE 802.1Q is a standard for VLAN tagging in Ethernet +networks. It allows network administrators to create logical VLANs on a +physical network and tag Ethernet frames with VLAN information, which is +called *VLAN-tagged frames*. IEEE 802.1ad, commonly known as QinQ or Double +VLAN, is an extension of the IEEE 802.1Q standard. QinQ allows for the +stacking of multiple VLAN tags within a single Ethernet frame. The Linux +bridge supports both the IEEE 802.1Q and `802.1AD +`_ +protocol for VLAN tagging. + +`VLAN filtering `_ +on a bridge is disabled by default. After enabling VLAN filtering on a bridge, +it will start forwarding frames to appropriate destinations based on their +destination MAC address and VLAN tag (both must match). + FAQ === -- cgit v1.3.1 From 75ceac88efb84c72b684d73a16c37842ca08fb14 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:47 +0800 Subject: docs: bridge: add multicast doc Add multicast part for bridge document. Acked-by: Nikolay Aleksandrov Reviewed-by: Florian Fainelli Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 56 +++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index 97936b9564fd..863ad2c8d146 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -177,6 +177,62 @@ on a bridge is disabled by default. After enabling VLAN filtering on a bridge, it will start forwarding frames to appropriate destinations based on their destination MAC address and VLAN tag (both must match). +Multicast +========= + +The Linux bridge driver has multicast support allowing it to process Internet +Group Management Protocol (IGMP) or Multicast Listener Discovery (MLD) +messages, and to efficiently forward multicast data packets. The bridge +driver supports IGMPv2/IGMPv3 and MLDv1/MLDv2. + +Multicast snooping +------------------ + +Multicast snooping is a networking technology that allows network switches +to intelligently manage multicast traffic within a local area network (LAN). + +The switch maintains a multicast group table, which records the association +between multicast group addresses and the ports where hosts have joined these +groups. The group table is dynamically updated based on the IGMP/MLD messages +received. With the multicast group information gathered through snooping, the +switch optimizes the forwarding of multicast traffic. Instead of blindly +broadcasting the multicast traffic to all ports, it sends the multicast +traffic based on the destination MAC address only to ports which have +subscribed the respective destination multicast group. + +When created, the Linux bridge devices have multicast snooping enabled by +default. It maintains a Multicast forwarding database (MDB) which keeps track +of port and group relationships. + +IGMPv3/MLDv2 EHT support +------------------------ + +The Linux bridge supports IGMPv3/MLDv2 EHT (Explicit Host Tracking), which +was added by `474ddb37fa3a ("net: bridge: multicast: add EHT allow/block handling") +`_ + +The explicit host tracking enables the device to keep track of each +individual host that is joined to a particular group or channel. The main +benefit of the explicit host tracking in IGMP is to allow minimal leave +latencies when a host leaves a multicast group or channel. + +The length of time between a host wanting to leave and a device stopping +traffic forwarding is called the IGMP leave latency. A device configured +with IGMPv3 or MLDv2 and explicit tracking can immediately stop forwarding +traffic if the last host to request to receive traffic from the device +indicates that it no longer wants to receive traffic. The leave latency +is thus bound only by the packet transmission latencies in the multiaccess +network and the processing time in the device. + +Other multicast features +------------------------ + +The Linux bridge also supports `per-VLAN multicast snooping +`_, +which is disabled by default but can be enabled. And `Multicast Router Discovery +`_, +which help identify the location of multicast routers. + FAQ === -- cgit v1.3.1 From 3c37f17d6ca9a2153486e2893f996a9f1525c410 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:48 +0800 Subject: docs: bridge: add switchdev doc Add switchdev part for bridge document. Reviewed-by: Vladimir Oltean Reviewed-by: Florian Fainelli Acked-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index 863ad2c8d146..e96af89cd061 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -233,6 +233,24 @@ which is disabled by default but can be enabled. And `Multicast Router Discovery `_, which help identify the location of multicast routers. +Switchdev +========= + +Linux Bridge Switchdev is a feature in the Linux kernel that extends the +capabilities of the traditional Linux bridge to work more efficiently with +hardware switches that support switchdev. With Linux Bridge Switchdev, certain +networking functions like forwarding, filtering, and learning of Ethernet +frames can be offloaded to a hardware switch. This offloading reduces the +burden on the Linux kernel and CPU, leading to improved network performance +and lower latency. + +To use Linux Bridge Switchdev, you need hardware switches that support the +switchdev interface. This means that the switch hardware needs to have the +necessary drivers and functionality to work in conjunction with the Linux +kernel. + +Please see the :ref:`switchdev` document for more details. + FAQ === -- cgit v1.3.1 From 1b1a4c7e82aeebef6bd68c94b0531aa72531f60c Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:49 +0800 Subject: docs: bridge: add netfilter doc Add netfilter part for bridge document. Reviewed-by: Florian Westphal Reviewed-by: Florian Fainelli Acked-by: Nikolay Aleksandrov Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index e96af89cd061..39ff8d126a04 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -251,6 +251,42 @@ kernel. Please see the :ref:`switchdev` document for more details. +Netfilter +========= + +The bridge netfilter module is a legacy feature that allows to filter bridged +packets with iptables and ip6tables. Its use is discouraged. Users should +consider using nftables for packet filtering. + +The older ebtables tool is more feature-limited compared to nftables, but +just like nftables it doesn't need this module either to function. + +The br_netfilter module intercepts packets entering the bridge, performs +minimal sanity tests on ipv4 and ipv6 packets and then pretends that +these packets are being routed, not bridged. br_netfilter then calls +the ip and ipv6 netfilter hooks from the bridge layer, i.e. ip(6)tables +rulesets will also see these packets. + +br_netfilter is also the reason for the iptables *physdev* match: +This match is the only way to reliably tell routed and bridged packets +apart in an iptables ruleset. + +Note that ebtables and nftables will work fine without the br_netfilter module. +iptables/ip6tables/arptables do not work for bridged traffic because they +plug in the routing stack. nftables rules in ip/ip6/inet/arp families won't +see traffic that is forwarded by a bridge either, but that's very much how it +should be. + +Historically the feature set of ebtables was very limited (it still is), +this module was added to pretend packets are routed and invoke the ipv4/ipv6 +netfilter hooks from the bridge so users had access to the more feature-rich +iptables matching capabilities (including conntrack). nftables doesn't have +this limitation, pretty much all features work regardless of the protocol family. + +So, br_netfilter is only needed if users, for some reason, need to use +ip(6)tables to filter packets forwarded by the bridge, or NAT bridged +traffic. For pure link layer filtering, this module isn't needed. + FAQ === -- cgit v1.3.1 From d2afc2cd7f1f46500e7ca830c453266f365ee43d Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Fri, 1 Dec 2023 16:19:50 +0800 Subject: docs: bridge: add other features Add some features that are not appropriate for the existing section to the "Others" part of the bridge document. Acked-by: Nikolay Aleksandrov Reviewed-by: Florian Fainelli Signed-off-by: Hangbin Liu Signed-off-by: Paolo Abeni --- Documentation/networking/bridge.rst | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index 39ff8d126a04..ba14e7b07869 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -287,6 +287,20 @@ So, br_netfilter is only needed if users, for some reason, need to use ip(6)tables to filter packets forwarded by the bridge, or NAT bridged traffic. For pure link layer filtering, this module isn't needed. +Other Features +============== + +The Linux bridge also supports `IEEE 802.11 Proxy ARP +`_, +`Media Redundancy Protocol (MRP) +`_, +`Media Redundancy Protocol (MRP) LC mode +`_, +`IEEE 802.1X port authentication +`_, +and `MAC Authentication Bypass (MAB) +`_. + FAQ === -- cgit v1.3.1 From b86455a1cbef6829e8da3f93d37a233be2616569 Mon Sep 17 00:00:00 2001 From: Arkadiusz Kubalewski Date: Fri, 1 Dec 2023 10:08:40 -0800 Subject: ice: add CGU info to devlink info callback If Clock Generation Unit is present on NIC board user shall know its details. Provide the devlink info callback with a new: - fixed type object (cgu.id) indicating hardware variant of onboard CGU, - running type object (fw.cgu) consisting of CGU id, config and firmware versions. These information shall be known for debugging purposes. Test (on NIC board with CGU) $ devlink dev info / | grep cgu cgu.id 36 fw.cgu 8032.16973825.6021 Test (on NIC board without CGU) $ devlink dev info / | grep cgu -c 0 Reviewed-by: Larysa Zaremba Signed-off-by: Arkadiusz Kubalewski Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Tony Nguyen Signed-off-by: Paolo Abeni --- Documentation/networking/devlink/ice.rst | 9 +++++++++ drivers/net/ethernet/intel/ice/ice_devlink.c | 20 ++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst index 2f60e34ab926..7f30ebd5debb 100644 --- a/Documentation/networking/devlink/ice.rst +++ b/Documentation/networking/devlink/ice.rst @@ -38,6 +38,10 @@ The ``ice`` driver reports the following versions - fixed - K65390-000 - The Product Board Assembly (PBA) identifier of the board. + * - ``cgu.id`` + - fixed + - 36 + - The Clock Generation Unit (CGU) hardware revision identifier. * - ``fw.mgmt`` - running - 2.1.7 @@ -104,6 +108,11 @@ The ``ice`` driver reports the following versions - running - 0xee16ced7 - The first 4 bytes of the hash of the netlist module contents. + * - ``fw.cgu`` + - running + - 8032.16973825.6021 + - The version of Clock Generation Unit (CGU). Format: + ... Flash Update ============ diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index f4e24d11ebd0..65be56f2af9e 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -193,6 +193,24 @@ ice_info_pending_netlist_build(struct ice_pf __always_unused *pf, snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); } +static void ice_info_cgu_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + u32 id, cfg_ver, fw_ver; + + if (!ice_is_feature_supported(pf, ICE_F_CGU)) + return; + if (ice_aq_get_cgu_info(&pf->hw, &id, &cfg_ver, &fw_ver)) + return; + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", id, cfg_ver, fw_ver); +} + +static void ice_info_cgu_id(struct ice_pf *pf, struct ice_info_ctx *ctx) +{ + if (!ice_is_feature_supported(pf, ICE_F_CGU)) + return; + snprintf(ctx->buf, sizeof(ctx->buf), "%u", pf->hw.cgu_part_number); +} + #define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL } #define running(key, getter) { ICE_VERSION_RUNNING, key, getter, NULL } #define stored(key, getter, fallback) { ICE_VERSION_STORED, key, getter, fallback } @@ -235,6 +253,8 @@ static const struct ice_devlink_version { running("fw.app.bundle_id", ice_info_ddp_pkg_bundle_id), combined("fw.netlist", ice_info_netlist_ver, ice_info_pending_netlist_ver), combined("fw.netlist.build", ice_info_netlist_build, ice_info_pending_netlist_build), + fixed("cgu.id", ice_info_cgu_id), + running("fw.cgu", ice_info_cgu_fw_build), }; /** -- cgit v1.3.1 From 5c399ae080ae507954f6f2efefc7349f8ed0e051 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 4 Dec 2023 09:42:31 -0800 Subject: xsk: Add missing SPDX to AF_XDP TX metadata documentation Not sure how I missed that. I even acknowledged it explicitly in the changelog [0]. Add the tag for real now. [0] https://lore.kernel.org/bpf/20231127190319.1190813-1-sdf@google.com/ Fixes: 11614723af26 ("xsk: Add option to calculate TX checksum in SW") Suggested-by: Simon Horman Signed-off-by: Stanislav Fomichev Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20231204174231.3457705-1-sdf@google.com --- Documentation/networking/xsk-tx-metadata.rst | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/xsk-tx-metadata.rst b/Documentation/networking/xsk-tx-metadata.rst index 97ecfa480d00..bd033fe95cca 100644 --- a/Documentation/networking/xsk-tx-metadata.rst +++ b/Documentation/networking/xsk-tx-metadata.rst @@ -1,3 +1,5 @@ +.. SPDX-License-Identifier: GPL-2.0 + ================== AF_XDP TX Metadata ================== -- cgit v1.3.1 From 19b707c3f23a7923ab40732521123d9b59965cc4 Mon Sep 17 00:00:00 2001 From: Coco Li Date: Mon, 4 Dec 2023 22:07:28 +0000 Subject: Documentations: fix net_cachelines documentation build warning Original errors: Documentation/networking/net_cachelines/index.rst:3: WARNING: Explicit markup ends without a blank line; unexpected unindent. Documentation/networking/net_cachelines/inet_connection_sock.rst:3: WARNING: Explicit markup ends without a blank line; unexpected unindent. Documentation/networking/net_cachelines/inet_sock.rst:3: WARNING: Explicit markup ends without a blank line; unexpected unindent. Documentation/networking/net_cachelines/net_device.rst:3: WARNING: Explicit markup ends without a blank line; unexpected unindent. Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst:3: WARNING: Explicit markup ends without a blank line; unexpected unindent. Documentation/networking/net_cachelines/snmp.rst:3: WARNING: Explicit markup ends without a blank line; unexpected unindent. Documentation/networking/net_cachelines/tcp_sock.rst:3: WARNING: Explicit markup ends without a blank line; unexpected unindent. Fixes: 14006f1d8fa2 ("Documentations: Analyze heavily used Networking related structs") Signed-off-by: Coco Li Link: https://lore.kernel.org/r/20231204220728.746134-1-lixiaoyan@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_cachelines/index.rst | 1 + Documentation/networking/net_cachelines/inet_connection_sock.rst | 1 + Documentation/networking/net_cachelines/inet_sock.rst | 1 + Documentation/networking/net_cachelines/net_device.rst | 1 + Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst | 1 + Documentation/networking/net_cachelines/snmp.rst | 1 + Documentation/networking/net_cachelines/tcp_sock.rst | 1 + 7 files changed, 7 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/net_cachelines/index.rst b/Documentation/networking/net_cachelines/index.rst index 6f1a99989bbd..2669e4cda086 100644 --- a/Documentation/networking/net_cachelines/index.rst +++ b/Documentation/networking/net_cachelines/index.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC + =================================== Common Networking Struct Cachelines =================================== diff --git a/Documentation/networking/net_cachelines/inet_connection_sock.rst b/Documentation/networking/net_cachelines/inet_connection_sock.rst index ad2b200147a6..7a911dc95652 100644 --- a/Documentation/networking/net_cachelines/inet_connection_sock.rst +++ b/Documentation/networking/net_cachelines/inet_connection_sock.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC + ===================================================== inet_connection_sock struct fast path usage breakdown ===================================================== diff --git a/Documentation/networking/net_cachelines/inet_sock.rst b/Documentation/networking/net_cachelines/inet_sock.rst index 4077febdb995..a2babd0d7954 100644 --- a/Documentation/networking/net_cachelines/inet_sock.rst +++ b/Documentation/networking/net_cachelines/inet_sock.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC + ===================================================== inet_connection_sock struct fast path usage breakdown ===================================================== diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 98508aa4f800..6cab1b797739 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC + =========================================== net_device struct fast path usage breakdown =========================================== diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst index 47e1a0105384..9b87089a84c6 100644 --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC + =========================================== netns_ipv4 struct fast path usage breakdown =========================================== diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst index 1eaeb8bc252c..6a071538566c 100644 --- a/Documentation/networking/net_cachelines/snmp.rst +++ b/Documentation/networking/net_cachelines/snmp.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC + =========================================== netns_ipv4 enum fast path usage breakdown =========================================== diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index 802ad22015d7..97d7a5c8e01c 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -1,5 +1,6 @@ .. SPDX-License-Identifier: GPL-2.0 .. Copyright (C) 2023 Google LLC + ========================================= tcp_sock struct fast path usage breakdown ========================================= -- cgit v1.3.1 From 8e3bfaab2ad956aa551a3cf8f7108f4b2c3b18d0 Mon Sep 17 00:00:00 2001 From: Ante Knezic Date: Tue, 5 Dec 2023 11:03:38 +0100 Subject: dt-bindings: net: microchip,ksz: document microchip,rmii-clk-internal Add documentation for selecting reference rmii clock on KSZ88X3 devices Signed-off-by: Ante Knezic Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/microchip,ksz.yaml | 34 +++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml index b3029c64d0d5..c963dc09e8e1 100644 --- a/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml +++ b/Documentation/devicetree/bindings/net/dsa/microchip,ksz.yaml @@ -11,7 +11,6 @@ maintainers: - Woojung Huh allOf: - - $ref: dsa.yaml#/$defs/ethernet-ports - $ref: /schemas/spi/spi-peripheral-props.yaml# properties: @@ -78,6 +77,39 @@ required: - compatible - reg +if: + not: + properties: + compatible: + enum: + - microchip,ksz8863 + - microchip,ksz8873 +then: + $ref: dsa.yaml#/$defs/ethernet-ports +else: + patternProperties: + "^(ethernet-)?ports$": + patternProperties: + "^(ethernet-)?port@[0-2]$": + $ref: dsa-port.yaml# + unevaluatedProperties: false + properties: + microchip,rmii-clk-internal: + $ref: /schemas/types.yaml#/definitions/flag + description: + When ksz88x3 is acting as clock provier (via REFCLKO) it + can select between internal and external RMII reference + clock. Internal reference clock means that the clock for + the RMII of ksz88x3 is provided by the ksz88x3 internally + and the REFCLKI pin is unconnected. For the external + reference clock, the clock needs to be fed back to ksz88x3 + via REFCLKI. + If microchip,rmii-clk-internal is set, ksz88x3 will provide + rmii reference clock internally, otherwise reference clock + should be provided externally. + dependencies: + microchip,rmii-clk-internal: [ethernet] + unevaluatedProperties: false examples: -- cgit v1.3.1 From a6de18f310a511278c1ff16b96eb2d500eada725 Mon Sep 17 00:00:00 2001 From: David Vernet Date: Thu, 7 Dec 2023 15:08:42 -0600 Subject: bpf: Add bpf_cpumask_weight() kfunc It can be useful to query how many bits are set in a cpumask. For example, if you want to perform special logic for the last remaining core that's set in a mask. Let's therefore add a new bpf_cpumask_weight() kfunc which checks how many bits are set in a mask. Signed-off-by: David Vernet Acked-by: Yonghong Song Link: https://lore.kernel.org/r/20231207210843.168466-2-void@manifault.com Signed-off-by: Alexei Starovoitov --- Documentation/bpf/cpumasks.rst | 2 +- kernel/bpf/cpumask.c | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/bpf/cpumasks.rst b/Documentation/bpf/cpumasks.rst index a22b6ad105fb..b5d47a04da5d 100644 --- a/Documentation/bpf/cpumasks.rst +++ b/Documentation/bpf/cpumasks.rst @@ -352,7 +352,7 @@ can be used to query the contents of cpumasks. .. kernel-doc:: kernel/bpf/cpumask.c :identifiers: bpf_cpumask_first bpf_cpumask_first_zero bpf_cpumask_first_and - bpf_cpumask_test_cpu + bpf_cpumask_test_cpu bpf_cpumask_weight .. kernel-doc:: kernel/bpf/cpumask.c :identifiers: bpf_cpumask_equal bpf_cpumask_intersects bpf_cpumask_subset diff --git a/kernel/bpf/cpumask.c b/kernel/bpf/cpumask.c index e01c741e54e7..7499b7d8c06f 100644 --- a/kernel/bpf/cpumask.c +++ b/kernel/bpf/cpumask.c @@ -405,6 +405,17 @@ __bpf_kfunc u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, return cpumask_any_and_distribute(src1, src2); } +/** + * bpf_cpumask_weight() - Return the number of bits in @cpumask. + * @cpumask: The cpumask being queried. + * + * Count the number of set bits in the given cpumask. + */ +__bpf_kfunc u32 bpf_cpumask_weight(const struct cpumask *cpumask) +{ + return cpumask_weight(cpumask); +} + __bpf_kfunc_end_defs(); BTF_SET8_START(cpumask_kfunc_btf_ids) @@ -432,6 +443,7 @@ BTF_ID_FLAGS(func, bpf_cpumask_full, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_copy, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_distribute, KF_RCU) BTF_ID_FLAGS(func, bpf_cpumask_any_and_distribute, KF_RCU) +BTF_ID_FLAGS(func, bpf_cpumask_weight, KF_RCU) BTF_SET8_END(cpumask_kfunc_btf_ids) static const struct btf_kfunc_id_set cpumask_kfunc_set = { -- cgit v1.3.1 From 68c84289bcc0dc75c1d27caccb55134e8d39dcc2 Mon Sep 17 00:00:00 2001 From: Swarup Laxman Kotiaklapudi Date: Fri, 8 Dec 2023 23:55:15 +0530 Subject: netlink: specs: devlink: add some(not all) missing attributes in devlink.yaml Add some missing(not all) attributes in devlink.yaml. Signed-off-by: Swarup Laxman Kotiaklapudi Suggested-by: Jiri Pirko Reviewed-by: Jiri Pirko Link: https://lore.kernel.org/r/20231208182515.1206616-1-swarupkotikalapudi@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/devlink.yaml | 377 +++++++++++++++++++++++-------- 1 file changed, 288 insertions(+), 89 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 43067e1f63aa..c3a438197964 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -75,6 +75,14 @@ definitions: name: ipsec-crypto-bit - name: ipsec-packet-bit + - + type: enum + name: rate-type + entries: + - + name: leaf + - + name: node - type: enum name: sb-threshold-type @@ -111,6 +119,16 @@ definitions: name: none - name: basic + - + type: enum + name: dpipe-header-id + entries: + - + name: ethernet + - + name: ipv4 + - + name: ipv6 - type: enum name: dpipe-match-type @@ -174,6 +192,16 @@ definitions: name: trap - name: mirror + - + type: enum + name: trap-type + entries: + - + name: drop + - + name: exception + - + name: control attribute-sets: - @@ -194,27 +222,45 @@ attribute-sets: name: port-type type: u16 enum: port-type - - # TODO: fill in the attributes in between - + - + name: port-desired-type + type: u16 + - + name: port-netdev-ifindex + type: u32 + - + name: port-netdev-name + type: string + - + name: port-ibdev-name + type: string - name: port-split-count type: u32 - value: 9 - - # TODO: fill in the attributes in between - + - + name: port-split-group + type: u32 - name: sb-index type: u32 - value: 11 - - # TODO: fill in the attributes in between - + - + name: sb-size + type: u32 + - + name: sb-ingress-pool-count + type: u16 + - + name: sb-egress-pool-count + type: u16 + - + name: sb-ingress-tc-count + type: u16 + - + name: sb-egress-tc-count + type: u16 - name: sb-pool-index type: u16 - value: 17 - name: sb-pool-type type: u8 @@ -232,16 +278,16 @@ attribute-sets: - name: sb-tc-index type: u16 - value: 22 - - # TODO: fill in the attributes in between - + - + name: sb-occ-cur + type: u32 + - + name: sb-occ-max + type: u32 - name: eswitch-mode type: u16 - value: 25 enum: eswitch-mode - - name: eswitch-inline-mode type: u16 @@ -347,6 +393,7 @@ attribute-sets: - name: dpipe-header-id type: u32 + enum: dpipe-header-id - name: dpipe-header-fields type: nest @@ -381,7 +428,6 @@ attribute-sets: - name: eswitch-encap-mode type: u8 - value: 62 enum: eswitch-encap-mode - name: resource-list @@ -433,20 +479,25 @@ attribute-sets: name: port-flavour type: u16 enum: port-flavour - - # TODO: fill in the attributes in between - + - + name: port-number + type: u32 + - + name: port-split-subport-number + type: u32 + - + name: param + type: nest + nested-attributes: dl-param - name: param-name type: string - value: 81 - - # TODO: fill in the attributes in between - + - + name: param-generic + type: flag - name: param-type type: u8 - value: 83 # TODO: fill in the attributes in between @@ -458,20 +509,34 @@ attribute-sets: - name: region-name type: string - - # TODO: fill in the attributes in between - + - + name: region-size + type: u64 + - + name: region-snapshots + type: nest + nested-attributes: dl-region-snapshots + - + name: region-snapshot + type: nest + nested-attributes: dl-region-snapshot - name: region-snapshot-id type: u32 - value: 92 - - # TODO: fill in the attributes in between - + - + name: region-chunks + type: nest + nested-attributes: dl-region-chunks + - + name: region-chunk + type: nest + nested-attributes: dl-region-chunk + - + name: region-chunk-data + type: binary - name: region-chunk-addr type: u64 - value: 96 - name: region-chunk-len type: u64 @@ -502,14 +567,13 @@ attribute-sets: - name: info-version-value type: string - - # TODO: fill in the attributes in between - + - + name: sb-pool-cell-size + type: u32 - name: fmsg type: nest nested-attributes: dl-fmsg - value: 106 - name: fmsg-obj-nest-start type: flag @@ -525,20 +589,35 @@ attribute-sets: - name: fmsg-obj-name type: string + - + name: fmsg-obj-value-type + type: u8 # TODO: fill in the attributes in between + - + name: health-reporter + type: nest + value: 114 + nested-attributes: dl-health-reporter - name: health-reporter-name type: string - value: 115 - - # TODO: fill in the attributes in between - + - + name: health-reporter-state + type: u8 + - + name: health-reporter-err-count + type: u64 + - + name: health-reporter-recover-count + type: u64 + - + name: health-reporter-dump-ts + type: u64 - name: health-reporter-graceful-period type: u64 - value: 120 - name: health-reporter-auto-recover type: u8 @@ -548,55 +627,64 @@ attribute-sets: - name: flash-update-component type: string - - # TODO: fill in the attributes in between - + - + name: flash-update-status-msg + type: string + - + name: flash-update-status-done + type: u64 + - + name: flash-update-status-total + type: u64 - name: port-pci-pf-number type: u16 - value: 127 - - # TODO: fill in the attributes in between - + - + name: port-pci-vf-number + type: u16 + - + name: stats + type: nest + nested-attributes: dl-attr-stats - name: trap-name type: string - value: 130 - name: trap-action type: u8 enum: trap-action - - # TODO: fill in the attributes in between - + - + name: trap-type + type: u8 + enum: trap-type + - + name: trap-generic + type: flag + - + name: trap-metadata + type: nest + nested-attributes: dl-trap-metadata - name: trap-group-name type: string - value: 135 - - name: reload-failed type: u8 - - # TODO: fill in the attributes in between - + - + name: health-reporter-dump-ts-ns + type: u64 - name: netns-fd type: u32 - value: 138 - name: netns-pid type: u32 - name: netns-id type: u32 - - # TODO: fill in the attributes in between - - name: health-reporter-auto-dump type: u8 - value: 141 - name: trap-policer-id type: u32 @@ -610,22 +698,29 @@ attribute-sets: name: port-function type: nest nested-attributes: dl-port-function - - # TODO: fill in the attributes in between - + - + name: info-board-serial-number + type: string + - + name: port-lanes + type: u32 + - + name: port-splittable + type: u8 + - + name: port-external + type: u8 - name: port-controller-number type: u32 - value: 150 - - # TODO: fill in the attributes in between - + - + name: flash-update-status-timeout + type: u64 - name: flash-update-overwrite-mask type: bitfield32 enum: flash-overwrite enum-as-flags: True - value: 152 - name: reload-action type: u8 @@ -673,20 +768,16 @@ attribute-sets: type: nest multi-attr: true nested-attributes: dl-reload-act-stats - - # TODO: fill in the attributes in between - - name: port-pci-sf-number type: u32 - value: 164 - - # TODO: fill in the attributes in between - + - + name: rate-type + type: u16 + enum: rate-type - name: rate-tx-share type: u64 - value: 166 - name: rate-tx-max type: u64 @@ -696,20 +787,22 @@ attribute-sets: - name: rate-parent-node-name type: string - - # TODO: fill in the attributes in between - + - + name: region-max-snapshots + type: u32 - name: linecard-index type: u32 - value: 171 - - # TODO: fill in the attributes in between - + - + name: linecard-state + type: u8 - name: linecard-type type: string - value: 173 + - + name: linecard-supported-types + type: nest + nested-attributes: dl-linecard-supported-types # TODO: fill in the attributes in between @@ -736,12 +829,14 @@ attribute-sets: name: reload-stats - name: remote-reload-stats + - name: dl-reload-stats subset-of: devlink attributes: - name: reload-action-info + - name: dl-reload-act-info subset-of: devlink @@ -750,12 +845,14 @@ attribute-sets: name: reload-action - name: reload-action-stats + - name: dl-reload-act-stats subset-of: devlink attributes: - name: reload-stats-entry + - name: dl-reload-stats-entry subset-of: devlink @@ -764,6 +861,7 @@ attribute-sets: name: reload-stats-limit - name: reload-stats-value + - name: dl-info-version subset-of: devlink @@ -772,6 +870,7 @@ attribute-sets: name: info-version-name - name: info-version-value + - name: dl-port-function name-prefix: devlink-port-fn-attr- @@ -1005,6 +1104,49 @@ attribute-sets: - name: resource + - + name: dl-param + subset-of: devlink + attributes: + - + name: param-name + - + name: param-generic + - + name: param-type + + # TODO: fill in the attribute param-value-list + + - + name: dl-region-snapshots + subset-of: devlink + attributes: + - + name: region-snapshot + + - + name: dl-region-snapshot + subset-of: devlink + attributes: + - + name: region-snapshot-id + + - + name: dl-region-chunks + subset-of: devlink + attributes: + - + name: region-chunk + + - + name: dl-region-chunk + subset-of: devlink + attributes: + - + name: region-chunk-data + - + name: region-chunk-addr + - name: dl-fmsg subset-of: devlink @@ -1020,6 +1162,62 @@ attribute-sets: - name: fmsg-obj-name + - + name: dl-health-reporter + subset-of: devlink + attributes: + - + name: health-reporter-name + - + name: health-reporter-state + - + name: health-reporter-err-count + - + name: health-reporter-recover-count + - + name: health-reporter-graceful-period + - + name: health-reporter-auto-recover + - + name: health-reporter-dump-ts + - + name: health-reporter-dump-ts-ns + - + name: health-reporter-auto-dump + + - + name: dl-attr-stats + name-prefix: devlink-attr- + attributes: + - name: stats-rx-packets + type: u64 + value: 0 + - + name: stats-rx-bytes + type: u64 + - + name: stats-rx-dropped + type: u64 + + - + name: dl-trap-metadata + name-prefix: devlink-attr- + attributes: + - + name: trap-metadata-type-in-port + type: flag + value: 0 + - + name: trap-metadata-type-fa-cookie + type: flag + + - + name: dl-linecard-supported-types + subset-of: devlink + attributes: + - + name: linecard-type + - name: dl-selftest-id name-prefix: devlink-attr-selftest-id- @@ -1077,6 +1275,7 @@ operations: reply: value: 3 # due to a bug, port dump returns DEVLINK_CMD_NEW attributes: *port-id-attrs + - name: port-set doc: Set devlink port instances. -- cgit v1.3.1 From 766f5f900f156655c04230403fffdfb169a511ed Mon Sep 17 00:00:00 2001 From: Herve Codina Date: Tue, 28 Nov 2023 14:25:31 +0100 Subject: dt-bindings: net: Add the Lantiq PEF2256 E1/T1/J1 framer The Lantiq PEF2256 is a framer and line interface component designed to fulfill all required interfacing between an analog E1/T1/J1 line and the digital PCM system highway/H.100 bus. Signed-off-by: Herve Codina Reviewed-by: Christophe Leroy Reviewed-by: Rob Herring Link: https://lore.kernel.org/r/20231128132534.258459-3-herve.codina@bootlin.com Signed-off-by: Linus Walleij --- .../devicetree/bindings/net/lantiq,pef2256.yaml | 213 +++++++++++++++++++++ 1 file changed, 213 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/lantiq,pef2256.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/lantiq,pef2256.yaml b/Documentation/devicetree/bindings/net/lantiq,pef2256.yaml new file mode 100644 index 000000000000..7da8370e2468 --- /dev/null +++ b/Documentation/devicetree/bindings/net/lantiq,pef2256.yaml @@ -0,0 +1,213 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/lantiq,pef2256.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Lantiq PEF2256 + +maintainers: + - Herve Codina + +description: + The Lantiq PEF2256, also known as Infineon PEF2256 or FALC56, is a framer and + line interface component designed to fulfill all required interfacing between + an analog E1/T1/J1 line and the digital PCM system highway/H.100 bus. + +properties: + compatible: + items: + - const: lantiq,pef2256 + + reg: + maxItems: 1 + + clocks: + items: + - description: Master Clock + - description: System Clock Receive + - description: System Clock Transmit + + clock-names: + items: + - const: mclk + - const: sclkr + - const: sclkx + + interrupts: + maxItems: 1 + + reset-gpios: + description: + GPIO used to reset the device. + maxItems: 1 + + pinctrl: + $ref: /schemas/pinctrl/pinctrl.yaml# + additionalProperties: false + + patternProperties: + '-pins$': + type: object + $ref: /schemas/pinctrl/pinmux-node.yaml# + additionalProperties: false + + properties: + pins: + enum: [ RPA, RPB, RPC, RPD, XPA, XPB, XPC, XPD ] + + function: + enum: [ SYPR, RFM, RFMB, RSIGM, RSIG, DLR, FREEZE, RFSP, LOS, + SYPX, XFMS, XSIG, TCLK, XMFB, XSIGM, DLX, XCLK, XLT, + GPI, GPOH, GPOL ] + + required: + - pins + - function + + lantiq,data-rate-bps: + enum: [2048000, 4096000, 8192000, 16384000] + default: 2048000 + description: + Data rate (bit per seconds) on the system highway. + + lantiq,clock-falling-edge: + $ref: /schemas/types.yaml#/definitions/flag + description: + Data is sent on falling edge of the clock (and received on the rising + edge). If 'clock-falling-edge' is not present, data is sent on the + rising edge (and received on the falling edge). + + lantiq,channel-phase: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [0, 1, 2, 3, 4, 5, 6, 7] + default: 0 + description: | + The pef2256 delivers a full frame (32 8-bit time-slots in E1 and 24 8-bit + time-slots 8 8-bit signaling in E1/J1) every 125us. This lead to a data + rate of 2048000 bit/s. When lantiq,data-rate-bps is more than 2048000 + bit/s, the data (all 32 8-bit) present in the frame are interleave with + unused time-slots. The lantiq,channel-phase property allows to set the + correct alignment of the interleave mechanism. + For instance, suppose lantiq,data-rate-bps = 8192000 (ie 4*2048000), and + lantiq,channel-phase = 2, the interleave schema with unused time-slots + (nu) and used time-slots (XX) for TSi is + nu nu XX nu nu nu XX nu nu nu XX nu + <-- TSi --> <- TSi+1 -> <- TSi+2 -> + With lantiq,data-rate-bps = 8192000, and lantiq,channel-phase = 1, the + interleave schema is + nu XX nu nu nu XX nu nu nu XX nu nu + <-- TSi --> <- TSi+1 -> <- TSi+2 -> + With lantiq,data-rate-bps = 4096000 (ie 2*2048000), and + lantiq,channel-phase = 1, the interleave schema is + nu XX nu XX nu XX + <-- TSi --> <- TSi+1 -> <- TSi+2 -> + +patternProperties: + '^codec(-([0-9]|[1-2][0-9]|3[0-1]))?$': + type: object + $ref: /schemas/sound/dai-common.yaml + unevaluatedProperties: false + description: + Codec provided by the pef2256. This codec allows to use some of the PCM + system highway time-slots as audio channels to transport audio data over + the E1/T1/J1 lines. + The time-slots used by the codec must be set and so, the properties + 'dai-tdm-slot-num', 'dai-tdm-slot-width', 'dai-tdm-slot-tx-mask' and + 'dai-tdm-slot-rx-mask' must be present in the sound card node for + sub-nodes that involve the codec. The codec uses 8-bit time-slots. + 'dai-tdm-tdm-slot-with' must be set to 8. + The tx and rx masks define the pef2256 time-slots assigned to the codec. + + properties: + compatible: + const: lantiq,pef2256-codec + + '#sound-dai-cells': + const: 0 + + required: + - compatible + - '#sound-dai-cells' + +required: + - compatible + - reg + - clocks + - clock-names + - interrupts + +additionalProperties: false + +examples: + - | + #include + #include + + pef2256: framer@2000000 { + compatible = "lantiq,pef2256"; + reg = <0x2000000 0x100>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; + interrupt-parent = <&intc>; + clocks = <&clk_mclk>, <&clk_sclkr>, <&clk_sclkx>; + clock-names = "mclk", "sclkr", "sclkx"; + reset-gpios = <&gpio 11 GPIO_ACTIVE_LOW>; + lantiq,data-rate-bps = <4096000>; + + pinctrl { + pef2256_rpa_sypr: rpa-pins { + pins = "RPA"; + function = "SYPR"; + }; + pef2256_xpa_sypx: xpa-pins { + pins = "XPA"; + function = "SYPX"; + }; + }; + + pef2256_codec0: codec-0 { + compatible = "lantiq,pef2256-codec"; + #sound-dai-cells = <0>; + sound-name-prefix = "PEF2256_0"; + }; + + pef2256_codec1: codec-1 { + compatible = "lantiq,pef2256-codec"; + #sound-dai-cells = <0>; + sound-name-prefix = "PEF2256_1"; + }; + }; + + sound { + compatible = "simple-audio-card"; + #address-cells = <1>; + #size-cells = <0>; + simple-audio-card,dai-link@0 { /* CPU DAI1 - pef2256 codec 1 */ + reg = <0>; + cpu { + sound-dai = <&cpu_dai1>; + }; + codec { + sound-dai = <&pef2256_codec0>; + dai-tdm-slot-num = <4>; + dai-tdm-slot-width = <8>; + /* TS 1, 2, 3, 4 */ + dai-tdm-slot-tx-mask = <0 1 1 1 1>; + dai-tdm-slot-rx-mask = <0 1 1 1 1>; + }; + }; + simple-audio-card,dai-link@1 { /* CPU DAI2 - pef2256 codec 2 */ + reg = <1>; + cpu { + sound-dai = <&cpu_dai2>; + }; + codec { + sound-dai = <&pef2256_codec1>; + dai-tdm-slot-num = <4>; + dai-tdm-slot-width = <8>; + /* TS 5, 6, 7, 8 */ + dai-tdm-slot-tx-mask = <0 0 0 0 0 1 1 1 1>; + dai-tdm-slot-rx-mask = <0 0 0 0 0 1 1 1 1>; + }; + }; + }; -- cgit v1.3.1 From e6795330f88b4f643c649a02662d47b779340535 Mon Sep 17 00:00:00 2001 From: Larysa Zaremba Date: Tue, 5 Dec 2023 22:08:38 +0100 Subject: xdp: Add VLAN tag hint Implement functionality that enables drivers to expose VLAN tag to XDP code. VLAN tag is represented by 2 variables: - protocol ID, which is passed to bpf code in BE - VLAN TCI, in host byte order Acked-by: Stanislav Fomichev Signed-off-by: Larysa Zaremba Acked-by: Jesper Dangaard Brouer Link: https://lore.kernel.org/r/20231205210847.28460-10-larysa.zaremba@intel.com Signed-off-by: Alexei Starovoitov --- Documentation/netlink/specs/netdev.yaml | 4 ++++ Documentation/networking/xdp-rx-metadata.rst | 8 ++++++- include/net/xdp.h | 6 +++++ include/uapi/linux/netdev.h | 3 +++ net/core/xdp.c | 33 ++++++++++++++++++++++++++++ tools/include/uapi/linux/netdev.h | 3 +++ tools/net/ynl/generated/netdev-user.c | 1 + 7 files changed, 57 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/netdev.yaml b/Documentation/netlink/specs/netdev.yaml index eef6358ec587..aeec090e1387 100644 --- a/Documentation/netlink/specs/netdev.yaml +++ b/Documentation/netlink/specs/netdev.yaml @@ -54,6 +54,10 @@ definitions: name: hash doc: Device is capable of exposing receive packet hash via bpf_xdp_metadata_rx_hash(). + - + name: vlan-tag + doc: + Device is capable of exposing receive packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). - type: flags name: xsk-flags diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index e3e9420fd817..a6e0ece18be5 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -20,7 +20,13 @@ Currently, the following kfuncs are supported. In the future, as more metadata is supported, this set will grow: .. kernel-doc:: net/core/xdp.c - :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash + :identifiers: bpf_xdp_metadata_rx_timestamp + +.. kernel-doc:: net/core/xdp.c + :identifiers: bpf_xdp_metadata_rx_hash + +.. kernel-doc:: net/core/xdp.c + :identifiers: bpf_xdp_metadata_rx_vlan_tag An XDP program can use these kfuncs to read the metadata into stack variables for its own consumption. Or, to pass the metadata on to other diff --git a/include/net/xdp.h b/include/net/xdp.h index b7d6fe61381f..8cd04a74dba5 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -404,6 +404,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, NETDEV_XDP_RX_METADATA_HASH, \ bpf_xdp_metadata_rx_hash, \ xmo_rx_hash) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \ + NETDEV_XDP_RX_METADATA_VLAN_TAG, \ + bpf_xdp_metadata_rx_vlan_tag, \ + xmo_rx_vlan_tag) \ enum xdp_rx_metadata { #define XDP_METADATA_KFUNC(name, _, __, ___) name, @@ -465,6 +469,8 @@ struct xdp_metadata_ops { int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type); + int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, __be16 *vlan_proto, + u16 *vlan_tci); }; #ifdef CONFIG_NET diff --git a/include/uapi/linux/netdev.h b/include/uapi/linux/netdev.h index 6244c0164976..966638b08ccf 100644 --- a/include/uapi/linux/netdev.h +++ b/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** diff --git a/net/core/xdp.c b/net/core/xdp.c index b6f1d6dab3f2..4869c1c2d8f3 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -736,6 +736,39 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, return -EOPNOTSUPP; } +/** + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag + * @ctx: XDP context pointer. + * @vlan_proto: Destination pointer for VLAN Tag protocol identifier (TPID). + * @vlan_tci: Destination pointer for VLAN TCI (VID + DEI + PCP) + * + * In case of success, ``vlan_proto`` contains *Tag protocol identifier (TPID)*, + * usually ``ETH_P_8021Q`` or ``ETH_P_8021AD``, but some networks can use + * custom TPIDs. ``vlan_proto`` is stored in **network byte order (BE)** + * and should be used as follows: + * ``if (vlan_proto == bpf_htons(ETH_P_8021Q)) do_something();`` + * + * ``vlan_tci`` contains the remaining 16 bits of a VLAN tag. + * Driver is expected to provide those in **host byte order (usually LE)**, + * so the bpf program should not perform byte conversion. + * According to 802.1Q standard, *VLAN TCI (Tag control information)* + * is a bit field that contains: + * *VLAN identifier (VID)* that can be read with ``vlan_tci & 0xfff``, + * *Drop eligible indicator (DEI)* - 1 bit, + * *Priority code point (PCP)* - 3 bits. + * For detailed meaning of DEI and PCP, please refer to other sources. + * + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc + * * ``-ENODATA`` : VLAN tag was not stripped or is not available + */ +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, u16 *vlan_tci) +{ + return -EOPNOTSUPP; +} + __bpf_kfunc_end_defs(); BTF_SET8_START(xdp_metadata_kfunc_ids) diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 6244c0164976..966638b08ccf 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** diff --git a/tools/net/ynl/generated/netdev-user.c b/tools/net/ynl/generated/netdev-user.c index 3b9dee94d4ce..e3fe748086bd 100644 --- a/tools/net/ynl/generated/netdev-user.c +++ b/tools/net/ynl/generated/netdev-user.c @@ -53,6 +53,7 @@ const char *netdev_xdp_act_str(enum netdev_xdp_act value) static const char * const netdev_xdp_rx_metadata_strmap[] = { [0] = "timestamp", [1] = "hash", + [2] = "vlan-tag", }; const char *netdev_xdp_rx_metadata_str(enum netdev_xdp_rx_metadata value) -- cgit v1.3.1 From 173b6d1cdf582e7438b3ab4ef2f40e6833579490 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Dec 2023 11:06:08 +0000 Subject: docs: networking: timestamping: mention MSG_EOR flag TCP got MSG_EOR support in linux-4.7. This is a canonical way of making sure no coalescing will be performed on the skb, even if it could not be immediately sent. Signed-off-by: Eric Dumazet Cc: Martin KaFai Lau Acked-by: Willem de Bruijn Link: https://lore.kernel.org/r/20231212110608.3673677-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/timestamping.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst index f17c01834a12..5e93cd71f99f 100644 --- a/Documentation/networking/timestamping.rst +++ b/Documentation/networking/timestamping.rst @@ -357,7 +357,8 @@ enabling SOF_TIMESTAMPING_OPT_ID and comparing the byte offset at send time with the value returned for each timestamp. It can prevent the situation by always flushing the TCP stack in between requests, for instance by enabling TCP_NODELAY and disabling TCP_CORK and -autocork. +autocork. After linux-4.7, a better way to prevent coalescing is +to use MSG_EOR flag at sendmsg() time. These precautions ensure that the timestamp is generated only when all bytes have passed a timestamp point, assuming that the network stack -- cgit v1.3.1 From 13e59344fb9d3c9d3acd138ae320b5b67b658694 Mon Sep 17 00:00:00 2001 From: Ahmed Zaki Date: Tue, 12 Dec 2023 17:33:16 -0700 Subject: net: ethtool: add support for symmetric-xor RSS hash Symmetric RSS hash functions are beneficial in applications that monitor both Tx and Rx packets of the same flow (IDS, software firewalls, ..etc). Getting all traffic of the same flow on the same RX queue results in higher CPU cache efficiency. A NIC that supports "symmetric-xor" can achieve this RSS hash symmetry by XORing the source and destination fields and pass the values to the RSS hash algorithm. The user may request RSS hash symmetry for a specific algorithm, via: # ethtool -X eth0 hfunc symmetric-xor or turn symmetry off (asymmetric) by: # ethtool -X eth0 hfunc The specific fields for each flow type should then be specified as usual via: # ethtool -N|-U eth0 rx-flow-hash s|d|f|n Reviewed-by: Wojciech Drewek Signed-off-by: Ahmed Zaki Link: https://lore.kernel.org/r/20231213003321.605376-4-ahmed.zaki@intel.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 4 ++++ Documentation/networking/ethtool-netlink.rst | 6 +++++- Documentation/networking/scaling.rst | 15 ++++++++++++++ include/linux/ethtool.h | 6 ++++++ include/uapi/linux/ethtool.h | 13 +++++++++++- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/ioctl.c | 30 ++++++++++++++++++++++++---- net/ethtool/rss.c | 5 +++++ 8 files changed, 74 insertions(+), 6 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 5c7a65b009b4..197208f419dc 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -908,6 +908,9 @@ attribute-sets: - name: hkey type: binary + - + name: input_xfrm + type: u32 - name: plca attributes: @@ -1598,6 +1601,7 @@ operations: - hfunc - indir - hkey + - input_xfrm dump: *rss-get-op - name: plca-get-cfg diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 6a49624a9cbf..d583d9abf2f8 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1774,12 +1774,16 @@ Kernel response contents: ``ETHTOOL_A_RSS_HFUNC`` u32 RSS hash func ``ETHTOOL_A_RSS_INDIR`` binary Indir table bytes ``ETHTOOL_A_RSS_HKEY`` binary Hash key bytes + ``ETHTOOL_A_RSS_INPUT_XFRM`` u32 RSS input data transformation ===================================== ====== ========================== ETHTOOL_A_RSS_HFUNC attribute is bitmap indicating the hash function being used. Current supported options are toeplitz, xor or crc32. -ETHTOOL_A_RSS_INDIR attribute returns RSS indrection table where each byte +ETHTOOL_A_RSS_INDIR attribute returns RSS indirection table where each byte indicates queue number. +ETHTOOL_A_RSS_INPUT_XFRM attribute is a bitmap indicating the type of +transformation applied to the input protocol fields before given to the RSS +hfunc. Current supported option is symmetric-xor. PLCA_GET_CFG ============ diff --git a/Documentation/networking/scaling.rst b/Documentation/networking/scaling.rst index 03ae19a689fc..4eb50bcb9d42 100644 --- a/Documentation/networking/scaling.rst +++ b/Documentation/networking/scaling.rst @@ -44,6 +44,21 @@ by masking out the low order seven bits of the computed hash for the packet (usually a Toeplitz hash), taking this number as a key into the indirection table and reading the corresponding value. +Some NICs support symmetric RSS hashing where, if the IP (source address, +destination address) and TCP/UDP (source port, destination port) tuples +are swapped, the computed hash is the same. This is beneficial in some +applications that monitor TCP/IP flows (IDS, firewalls, ...etc) and need +both directions of the flow to land on the same Rx queue (and CPU). The +"Symmetric-XOR" is a type of RSS algorithms that achieves this hash +symmetry by XORing the input source and destination fields of the IP +and/or L4 protocols. This, however, results in reduced input entropy and +could potentially be exploited. Specifically, the algorithm XORs the input +as follows:: + + # (SRC_IP ^ DST_IP, SRC_IP ^ DST_IP, SRC_PORT ^ DST_PORT, SRC_PORT ^ DST_PORT) + +The result is then fed to the underlying RSS algorithm. + Some advanced NICs allow steering packets to queues based on programmable filters. For example, webserver bound TCP port 80 packets can be directed to their own receive queue. Such “n-tuple” filters can diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 66fe254c3e51..cfcd952a1d4f 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -615,6 +615,8 @@ struct ethtool_mm_stats { * to allocate a new RSS context; on return this field will * contain the ID of the newly allocated context. * @rss_delete: Set to non-ZERO to remove the @rss_context context. + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. */ struct ethtool_rxfh_param { u8 hfunc; @@ -624,6 +626,7 @@ struct ethtool_rxfh_param { u8 *key; u32 rss_context; u8 rss_delete; + u8 input_xfrm; }; /** @@ -632,6 +635,8 @@ struct ethtool_rxfh_param { * parameter. * @cap_rss_ctx_supported: indicates if the driver supports RSS * contexts. + * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor + * RSS. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -811,6 +816,7 @@ struct ethtool_rxfh_param { struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; + u32 cap_rss_sym_xor_supported:1; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index f7fba0dc87e5..0787d561ace0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1266,6 +1266,8 @@ struct ethtool_rxfh_indir { * hardware hash key. * @hfunc: Defines the current RSS hash function used by HW (or to be set to). * Valid values are one of the %ETH_RSS_HASH_*. + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. * @rsvd8: Reserved for future use; see the note on reserved space. * @rsvd32: Reserved for future use; see the note on reserved space. * @rss_config: RX ring/queue index for each hash value i.e., indirection table @@ -1285,7 +1287,8 @@ struct ethtool_rxfh { __u32 indir_size; __u32 key_size; __u8 hfunc; - __u8 rsvd8[3]; + __u8 input_xfrm; + __u8 rsvd8[2]; __u32 rsvd32; __u32 rss_config[]; }; @@ -1992,6 +1995,14 @@ static inline int ethtool_validate_duplex(__u8 duplex) #define WOL_MODE_COUNT 8 +/* RSS hash function data + * XOR the corresponding source and destination fields of each specified + * protocol. Both copies of the XOR'ed fields are fed into the RSS and RXHASH + * calculation. Note that this XORing reduces the input set entropy and could + * be exploited to reduce the RSS queue spread. + */ +#define RXH_XFRM_SYM_XOR (1 << 0) + /* L2-L4 network traffic flow types */ #define TCP_V4_FLOW 0x01 /* hash or spec (tcp_ip4_spec) */ #define UDP_V4_FLOW 0x02 /* hash or spec (udp_ip4_spec) */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 73e2c10dc2cc..3f89074aa06c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -908,6 +908,7 @@ enum { ETHTOOL_A_RSS_HFUNC, /* u32 */ ETHTOOL_A_RSS_INDIR, /* binary */ ETHTOOL_A_RSS_HKEY, /* binary */ + ETHTOOL_A_RSS_INPUT_XFRM, /* u32 */ __ETHTOOL_A_RSS_CNT, ETHTOOL_A_RSS_MAX = (__ETHTOOL_A_RSS_CNT - 1), diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 86e5fc64b711..86d47425038b 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -972,18 +972,35 @@ static int ethtool_rxnfc_copy_to_user(void __user *useraddr, static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, u32 cmd, void __user *useraddr) { + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_rxfh_param rxfh = {}; struct ethtool_rxnfc info; size_t info_size = sizeof(info); int rc; - if (!dev->ethtool_ops->set_rxnfc) + if (!ops->set_rxnfc || !ops->get_rxfh) return -EOPNOTSUPP; rc = ethtool_rxnfc_copy_struct(cmd, &info, &info_size, useraddr); if (rc) return rc; - rc = dev->ethtool_ops->set_rxnfc(dev, &info); + rc = ops->get_rxfh(dev, &rxfh); + if (rc) + return rc; + + /* Sanity check: if symmetric-xor is set, then: + * 1 - no other fields besides IP src/dst and/or L4 src/dst + * 2 - If src is set, dst must also be set + */ + if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + ((info.data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) || + (!!(info.data & RXH_IP_SRC) ^ !!(info.data & RXH_IP_DST)) || + (!!(info.data & RXH_L4_B_0_1) ^ !!(info.data & RXH_L4_B_2_3)))) + return -EINVAL; + + rc = ops->set_rxnfc(dev, &info); if (rc) return rc; @@ -1198,7 +1215,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, user_key_size = rxfh.key_size; /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ if (rxfh.rss_context && !ops->cap_rss_ctx_supported) @@ -1271,11 +1288,15 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, return -EFAULT; /* Check that reserved fields are 0 for now */ - if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd8[2] || rxfh.rsvd32) + if (rxfh.rsvd8[0] || rxfh.rsvd8[1] || rxfh.rsvd32) return -EINVAL; /* Most drivers don't handle rss_context, check it's 0 as well */ if (rxfh.rss_context && !ops->cap_rss_ctx_supported) return -EOPNOTSUPP; + /* Check input data transformation capabilities */ + if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && + !ops->cap_rss_sym_xor_supported) + return -EOPNOTSUPP; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key or function. @@ -1341,6 +1362,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, rxfh_dev.hfunc = rxfh.hfunc; rxfh_dev.rss_context = rxfh.rss_context; + rxfh_dev.input_xfrm = rxfh.input_xfrm; ret = ops->set_rxfh(dev, &rxfh_dev, extack); if (ret) diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index efc9f4409e40..71679137eff2 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -13,6 +13,7 @@ struct rss_reply_data { u32 indir_size; u32 hkey_size; u32 hfunc; + u32 input_xfrm; u32 *indir_table; u8 *hkey; }; @@ -97,6 +98,7 @@ rss_prepare_data(const struct ethnl_req_info *req_base, goto out_ops; data->hfunc = rxfh.hfunc; + data->input_xfrm = rxfh.input_xfrm; out_ops: ethnl_ops_complete(dev); return ret; @@ -110,6 +112,7 @@ rss_reply_size(const struct ethnl_req_info *req_base, int len; len = nla_total_size(sizeof(u32)) + /* _RSS_HFUNC */ + nla_total_size(sizeof(u32)) + /* _RSS_INPUT_XFRM */ nla_total_size(sizeof(u32) * data->indir_size) + /* _RSS_INDIR */ nla_total_size(data->hkey_size); /* _RSS_HKEY */ @@ -124,6 +127,8 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base, if ((data->hfunc && nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) || + (data->input_xfrm && + nla_put_u32(skb, ETHTOOL_A_RSS_INPUT_XFRM, data->input_xfrm)) || (data->indir_size && nla_put(skb, ETHTOOL_A_RSS_INDIR, sizeof(u32) * data->indir_size, data->indir_table)) || -- cgit v1.3.1 From d96f04e05f2634b2dea3cdfc9651f5704d829292 Mon Sep 17 00:00:00 2001 From: Paul M Stillwell Jr Date: Tue, 12 Dec 2023 21:07:15 -0800 Subject: ice: add documentation for FW logging Add documentation for FW logging in Documentation/networking/device_drivers/ethernet/intel/ice.rst Signed-off-by: Paul M Stillwell Jr Signed-off-by: Tony Nguyen --- .../device_drivers/ethernet/intel/ice.rst | 141 +++++++++++++++++++++ 1 file changed, 141 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/intel/ice.rst b/Documentation/networking/device_drivers/ethernet/intel/ice.rst index e4d065c55ea8..5038e54586af 100644 --- a/Documentation/networking/device_drivers/ethernet/intel/ice.rst +++ b/Documentation/networking/device_drivers/ethernet/intel/ice.rst @@ -895,6 +895,147 @@ driver writes raw bytes by the GNSS object to the receiver through i2c. Please refer to the hardware GNSS module documentation for configuration details. +Firmware (FW) logging +--------------------- +The driver supports FW logging via the debugfs interface on PF 0 only. The FW +running on the NIC must support FW logging; if the FW doesn't support FW logging +the 'fwlog' file will not get created in the ice debugfs directory. + +Module configuration +~~~~~~~~~~~~~~~~~~~~ +Firmware logging is configured on a per module basis. Each module can be set to +a value independent of the other modules (unless the module 'all' is specified). +The modules will be instantiated under the 'fwlog/modules' directory. + +The user can set the log level for a module by writing to the module file like +this:: + + # echo > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/ + +where + +* log_level is a name as described below. Each level includes the + messages from the previous/lower level + + * none + * error + * warning + * normal + * verbose + +* module is a name that represents the module to receive events for. The + module names are + + * general + * ctrl + * link + * link_topo + * dnl + * i2c + * sdp + * mdio + * adminq + * hdma + * lldp + * dcbx + * dcb + * xlr + * nvm + * auth + * vpd + * iosf + * parser + * sw + * scheduler + * txq + * rsvd + * post + * watchdog + * task_dispatch + * mng + * synce + * health + * tsdrv + * pfreg + * mdlver + * all + +The name 'all' is special and allows the user to set all of the modules to the +specified log_level or to read the log_level of all of the modules. + +Example usage to configure the modules +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To set a single module to 'verbose':: + + # echo verbose > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/link + +To set multiple modules then issue the command multiple times:: + + # echo verbose > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/link + # echo warning > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/ctrl + # echo none > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/dcb + +To set all the modules to the same value:: + + # echo normal > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/all + +To read the log_level of a specific module (e.g. module 'general'):: + + # cat /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/general + +To read the log_level of all the modules:: + + # cat /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/modules/all + +Enabling FW log +~~~~~~~~~~~~~~~ +Configuring the modules indicates to the FW that the configured modules should +generate events that the driver is interested in, but it **does not** send the +events to the driver until the enable message is sent to the FW. To do this +the user can write a 1 (enable) or 0 (disable) to 'fwlog/enable'. An example +is:: + + # echo 1 > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/enable + +Retrieving FW log data +~~~~~~~~~~~~~~~~~~~~~~ +The FW log data can be retrieved by reading from 'fwlog/data'. The user can +write any value to 'fwlog/data' to clear the data. The data can only be cleared +when FW logging is disabled. The FW log data is a binary file that is sent to +Intel and used to help debug user issues. + +An example to read the data is:: + + # cat /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/data > fwlog.bin + +An example to clear the data is:: + + # echo 0 > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/data + +Changing how often the log events are sent to the driver +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The driver receives FW log data from the Admin Receive Queue (ARQ). The +frequency that the FW sends the ARQ events can be configured by writing to +'fwlog/nr_messages'. The range is 1-128 (1 means push every log message, 128 +means push only when the max AQ command buffer is full). The suggested value is +10. The user can see what the value is configured to by reading +'fwlog/nr_messages'. An example to set the value is:: + + # echo 50 > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/nr_messages + +Configuring the amount of memory used to store FW log data +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The driver stores FW log data within the driver. The default size of the memory +used to store the data is 1MB. Some use cases may require more or less data so +the user can change the amount of memory that is allocated for FW log data. +To change the amount of memory then write to 'fwlog/log_size'. The value must be +one of: 128K, 256K, 512K, 1M, or 2M. FW logging must be disabled to change the +value. An example of changing the value is:: + + # echo 128K > /sys/kernel/debug/ice/0000\:18\:00.0/fwlog/log_size + + Performance Optimization ======================== Driver defaults are meant to fit a wide variety of workloads, but if further -- cgit v1.3.1 From 4944566706b27918ca15eda913889db296792415 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Dec 2023 10:48:59 +0000 Subject: net: increase optmem_max default value For many years, /proc/sys/net/core/optmem_max default value on a 64bit kernel has been 20 KB. Regular usage of TCP tx zerocopy needs a bit more. Google has used 128KB as the default value for 7 years without any problem. Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Reviewed-by: Willem de Bruijn Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- Documentation/admin-guide/sysctl/net.rst | 5 ++++- net/core/sock.c | 6 ++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst index c7525942f12c..396091651955 100644 --- a/Documentation/admin-guide/sysctl/net.rst +++ b/Documentation/admin-guide/sysctl/net.rst @@ -345,7 +345,10 @@ optmem_max ---------- Maximum ancillary buffer size allowed per socket. Ancillary data is a sequence -of struct cmsghdr structures with appended data. +of struct cmsghdr structures with appended data. TCP tx zerocopy also uses +optmem_max as a limit for its internal structures. + +Default : 128 KB fb_tunnels_only_for_init_net ---------------------------- diff --git a/net/core/sock.c b/net/core/sock.c index fef349dd72fa..08ecdc68d2df 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -283,8 +283,10 @@ EXPORT_SYMBOL(sysctl_rmem_max); __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; -/* Maximal space eaten by iovec or ancillary data plus some space */ -int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); +/* Limits per socket sk_omem_alloc usage. + * TCP zerocopy regular usage needs 128 KB. + */ +int sysctl_optmem_max __read_mostly = 128 * 1024; EXPORT_SYMBOL(sysctl_optmem_max); int sysctl_tstamp_allow_data __read_mostly = 1; -- cgit v1.3.1 From 3ada0b33c454e2c8e3909b6d90576e993ac52d78 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Dec 2023 17:57:33 -0800 Subject: netlink: specs: ovs: remove fixed header fields from attrs Op's "attributes" list is a workaround for families with a single attr set. We don't want to render a single huge request structure, the same for each op since we know that most ops accept only a small set of attributes. "Attributes" list lets us narrow down the attributes to what op acctually pays attention to. It doesn't make sense to put names of fixed headers in there. They are not "attributes" and we can't really narrow down the struct members. Remove the fixed header fields from attrs for ovs families in preparation for C codegen support. Reviewed-by: Donald Hunter Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/netlink/specs/ovs_datapath.yaml | 2 -- Documentation/netlink/specs/ovs_flow.yaml | 3 --- Documentation/netlink/specs/ovs_vport.yaml | 4 ---- 3 files changed, 9 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ovs_datapath.yaml b/Documentation/netlink/specs/ovs_datapath.yaml index f709c26c3e92..067c54a52d7a 100644 --- a/Documentation/netlink/specs/ovs_datapath.yaml +++ b/Documentation/netlink/specs/ovs_datapath.yaml @@ -142,7 +142,6 @@ operations: do: request: attributes: - - dp-ifindex - name - upcall-pid - user-features @@ -154,7 +153,6 @@ operations: do: request: attributes: - - dp-ifindex - name mcast-groups: diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 109ca1f57b6c..29315f3538fd 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -947,13 +947,11 @@ operations: do: &flow-get-op request: attributes: - - dp-ifindex - key - ufid - ufid-flags reply: attributes: - - dp-ifindex - key - ufid - mask @@ -968,7 +966,6 @@ operations: do: request: attributes: - - dp-ifindex - key - ufid - mask diff --git a/Documentation/netlink/specs/ovs_vport.yaml b/Documentation/netlink/specs/ovs_vport.yaml index f65ce62cd60d..86ba9ac2a521 100644 --- a/Documentation/netlink/specs/ovs_vport.yaml +++ b/Documentation/netlink/specs/ovs_vport.yaml @@ -135,7 +135,6 @@ operations: - name - type - upcall-pid - - dp-ifindex - ifindex - options - @@ -146,7 +145,6 @@ operations: do: request: attributes: - - dp-ifindex - port-no - type - name @@ -158,11 +156,9 @@ operations: do: &vport-get-op request: attributes: - - dp-ifindex - name reply: &dev-all attributes: - - dp-ifindex - port-no - type - name -- cgit v1.3.1 From 209bcb9af8f166ed6ed81dd550d41b8b64b2ac09 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Dec 2023 17:57:34 -0800 Subject: netlink: specs: ovs: correct enum names in specs Align the enum-names of OVS with what's actually in the uAPI. Either correct the names, or mark the enum as empty because the values are in fact #defines. Reviewed-by: Donald Hunter Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/netlink/specs/ovs_datapath.yaml | 1 + Documentation/netlink/specs/ovs_flow.yaml | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ovs_datapath.yaml b/Documentation/netlink/specs/ovs_datapath.yaml index 067c54a52d7a..edc8c95ca6f5 100644 --- a/Documentation/netlink/specs/ovs_datapath.yaml +++ b/Documentation/netlink/specs/ovs_datapath.yaml @@ -20,6 +20,7 @@ definitions: name: user-features type: flags name-prefix: ovs-dp-f- + enum-name: entries: - name: unaligned diff --git a/Documentation/netlink/specs/ovs_flow.yaml b/Documentation/netlink/specs/ovs_flow.yaml index 29315f3538fd..4fdfc6b5cae9 100644 --- a/Documentation/netlink/specs/ovs_flow.yaml +++ b/Documentation/netlink/specs/ovs_flow.yaml @@ -124,6 +124,7 @@ definitions: - name: ovs-frag-type name-prefix: ovs-frag-type- + enum-name: ovs-frag-type type: enum entries: - @@ -269,6 +270,7 @@ definitions: - name: ovs-ufid-flags name-prefix: ovs-ufid-f- + enum-name: type: flags entries: - omit-key @@ -288,6 +290,7 @@ definitions: doc: Basis used for computing hash. - name: ovs-hash-alg + enum-name: ovs-hash-alg type: enum doc: | Data path hash algorithm for computing Datapath hash. The algorithm type only specifies @@ -339,6 +342,7 @@ definitions: MPLS tunnel attributes. - name: ct-state-flags + enum-name: type: flags name-prefix: ovs-cs-f- entries: -- cgit v1.3.1 From b059aef76c519226730dd18777c0e15dad4fae21 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Dec 2023 17:57:35 -0800 Subject: netlink: specs: mptcp: rename the MPTCP path management spec We assume in handful of places that the name of the spec is the same as the name of the family. We could fix that but it seems like a fair assumption to make. Rename the MPTCP spec instead. Reviewed-by: Mat Martineau Reviewed-by: Donald Hunter Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/netlink/specs/mptcp.yaml | 393 ------------------------------ Documentation/netlink/specs/mptcp_pm.yaml | 393 ++++++++++++++++++++++++++++++ MAINTAINERS | 2 +- include/uapi/linux/mptcp_pm.h | 2 +- net/mptcp/mptcp_pm_gen.c | 2 +- net/mptcp/mptcp_pm_gen.h | 2 +- 6 files changed, 397 insertions(+), 397 deletions(-) delete mode 100644 Documentation/netlink/specs/mptcp.yaml create mode 100644 Documentation/netlink/specs/mptcp_pm.yaml (limited to 'Documentation') diff --git a/Documentation/netlink/specs/mptcp.yaml b/Documentation/netlink/specs/mptcp.yaml deleted file mode 100644 index 49f90cfb4698..000000000000 --- a/Documentation/netlink/specs/mptcp.yaml +++ /dev/null @@ -1,393 +0,0 @@ -# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) - -name: mptcp_pm -protocol: genetlink-legacy -doc: Multipath TCP. - -c-family-name: mptcp-pm-name -c-version-name: mptcp-pm-ver -max-by-define: true -kernel-policy: per-op -cmd-cnt-name: --mptcp-pm-cmd-after-last - -definitions: - - - type: enum - name: event-type - enum-name: mptcp-event-type - name-prefix: mptcp-event- - entries: - - - name: unspec - doc: unused event - - - name: created - doc: - token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport - A new MPTCP connection has been created. It is the good time to - allocate memory and send ADD_ADDR if needed. Depending on the - traffic-patterns it can take a long time until the - MPTCP_EVENT_ESTABLISHED is sent. - - - name: established - doc: - token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport - A MPTCP connection is established (can start new subflows). - - - name: closed - doc: - token - A MPTCP connection has stopped. - - - name: announced - value: 6 - doc: - token, rem_id, family, daddr4 | daddr6 [, dport] - A new address has been announced by the peer. - - - name: removed - doc: - token, rem_id - An address has been lost by the peer. - - - name: sub-established - value: 10 - doc: - token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, - dport, backup, if_idx [, error] - A new subflow has been established. 'error' should not be set. - - - name: sub-closed - doc: - token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, - dport, backup, if_idx [, error] - A subflow has been closed. An error (copy of sk_err) could be set if an - error has been detected for this subflow. - - - name: sub-priority - value: 13 - doc: - token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, - dport, backup, if_idx [, error] - The priority of a subflow has changed. 'error' should not be set. - - - name: listener-created - value: 15 - doc: - family, sport, saddr4 | saddr6 - A new PM listener is created. - - - name: listener-closed - doc: - family, sport, saddr4 | saddr6 - A PM listener is closed. - -attribute-sets: - - - name: address - name-prefix: mptcp-pm-addr-attr- - attributes: - - - name: unspec - type: unused - value: 0 - - - name: family - type: u16 - - - name: id - type: u8 - - - name: addr4 - type: u32 - byte-order: big-endian - - - name: addr6 - type: binary - checks: - exact-len: 16 - - - name: port - type: u16 - byte-order: big-endian - - - name: flags - type: u32 - - - name: if-idx - type: s32 - - - name: subflow-attribute - name-prefix: mptcp-subflow-attr- - attributes: - - - name: unspec - type: unused - value: 0 - - - name: token-rem - type: u32 - - - name: token-loc - type: u32 - - - name: relwrite-seq - type: u32 - - - name: map-seq - type: u64 - - - name: map-sfseq - type: u32 - - - name: ssn-offset - type: u32 - - - name: map-datalen - type: u16 - - - name: flags - type: u32 - - - name: id-rem - type: u8 - - - name: id-loc - type: u8 - - - name: pad - type: pad - - - name: endpoint - name-prefix: mptcp-pm-endpoint- - attributes: - - - name: addr - type: nest - nested-attributes: address - - - name: attr - name-prefix: mptcp-pm-attr- - attr-cnt-name: --mptcp-attr-after-last - attributes: - - - name: unspec - type: unused - value: 0 - - - name: addr - type: nest - nested-attributes: address - - - name: rcv-add-addrs - type: u32 - - - name: subflows - type: u32 - - - name: token - type: u32 - - - name: loc-id - type: u8 - - - name: addr-remote - type: nest - nested-attributes: address - - - name: event-attr - enum-name: mptcp-event-attr - name-prefix: mptcp-attr- - attributes: - - - name: unspec - type: unused - value: 0 - - - name: token - type: u32 - - - name: family - type: u16 - - - name: loc-id - type: u8 - - - name: rem-id - type: u8 - - - name: saddr4 - type: u32 - byte-order: big-endian - - - name: saddr6 - type: binary - checks: - min-len: 16 - - - name: daddr4 - type: u32 - byte-order: big-endian - - - name: daddr6 - type: binary - checks: - min-len: 16 - - - name: sport - type: u16 - byte-order: big-endian - - - name: dport - type: u16 - byte-order: big-endian - - - name: backup - type: u8 - - - name: error - type: u8 - - - name: flags - type: u16 - - - name: timeout - type: u32 - - - name: if_idx - type: u32 - - - name: reset-reason - type: u32 - - - name: reset-flags - type: u32 - - - name: server-side - type: u8 - -operations: - list: - - - name: unspec - doc: unused - value: 0 - - - name: add-addr - doc: Add endpoint - attribute-set: endpoint - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: &add-addr-attrs - request: - attributes: - - addr - - - name: del-addr - doc: Delete endpoint - attribute-set: endpoint - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: *add-addr-attrs - - - name: get-addr - doc: Get endpoint information - attribute-set: endpoint - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: &get-addr-attrs - request: - attributes: - - addr - reply: - attributes: - - addr - dump: - reply: - attributes: - - addr - - - name: flush-addrs - doc: flush addresses - attribute-set: endpoint - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: *add-addr-attrs - - - name: set-limits - doc: Set protocol limits - attribute-set: attr - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: &mptcp-limits - request: - attributes: - - rcv-add-addrs - - subflows - - - name: get-limits - doc: Get protocol limits - attribute-set: attr - dont-validate: [ strict ] - do: &mptcp-get-limits - request: - attributes: - - rcv-add-addrs - - subflows - reply: - attributes: - - rcv-add-addrs - - subflows - - - name: set-flags - doc: Change endpoint flags - attribute-set: attr - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: &mptcp-set-flags - request: - attributes: - - addr - - token - - addr-remote - - - name: announce - doc: announce new sf - attribute-set: attr - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: &announce-add - request: - attributes: - - addr - - token - - - name: remove - doc: announce removal - attribute-set: attr - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: - request: - attributes: - - token - - loc-id - - - name: subflow-create - doc: todo - attribute-set: attr - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: &sf-create - request: - attributes: - - addr - - token - - addr-remote - - - name: subflow-destroy - doc: todo - attribute-set: attr - dont-validate: [ strict ] - flags: [ uns-admin-perm ] - do: *sf-create diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml new file mode 100644 index 000000000000..49f90cfb4698 --- /dev/null +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -0,0 +1,393 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: mptcp_pm +protocol: genetlink-legacy +doc: Multipath TCP. + +c-family-name: mptcp-pm-name +c-version-name: mptcp-pm-ver +max-by-define: true +kernel-policy: per-op +cmd-cnt-name: --mptcp-pm-cmd-after-last + +definitions: + - + type: enum + name: event-type + enum-name: mptcp-event-type + name-prefix: mptcp-event- + entries: + - + name: unspec + doc: unused event + - + name: created + doc: + token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport + A new MPTCP connection has been created. It is the good time to + allocate memory and send ADD_ADDR if needed. Depending on the + traffic-patterns it can take a long time until the + MPTCP_EVENT_ESTABLISHED is sent. + - + name: established + doc: + token, family, saddr4 | saddr6, daddr4 | daddr6, sport, dport + A MPTCP connection is established (can start new subflows). + - + name: closed + doc: + token + A MPTCP connection has stopped. + - + name: announced + value: 6 + doc: + token, rem_id, family, daddr4 | daddr6 [, dport] + A new address has been announced by the peer. + - + name: removed + doc: + token, rem_id + An address has been lost by the peer. + - + name: sub-established + value: 10 + doc: + token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, backup, if_idx [, error] + A new subflow has been established. 'error' should not be set. + - + name: sub-closed + doc: + token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, backup, if_idx [, error] + A subflow has been closed. An error (copy of sk_err) could be set if an + error has been detected for this subflow. + - + name: sub-priority + value: 13 + doc: + token, family, loc_id, rem_id, saddr4 | saddr6, daddr4 | daddr6, sport, + dport, backup, if_idx [, error] + The priority of a subflow has changed. 'error' should not be set. + - + name: listener-created + value: 15 + doc: + family, sport, saddr4 | saddr6 + A new PM listener is created. + - + name: listener-closed + doc: + family, sport, saddr4 | saddr6 + A PM listener is closed. + +attribute-sets: + - + name: address + name-prefix: mptcp-pm-addr-attr- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: family + type: u16 + - + name: id + type: u8 + - + name: addr4 + type: u32 + byte-order: big-endian + - + name: addr6 + type: binary + checks: + exact-len: 16 + - + name: port + type: u16 + byte-order: big-endian + - + name: flags + type: u32 + - + name: if-idx + type: s32 + - + name: subflow-attribute + name-prefix: mptcp-subflow-attr- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: token-rem + type: u32 + - + name: token-loc + type: u32 + - + name: relwrite-seq + type: u32 + - + name: map-seq + type: u64 + - + name: map-sfseq + type: u32 + - + name: ssn-offset + type: u32 + - + name: map-datalen + type: u16 + - + name: flags + type: u32 + - + name: id-rem + type: u8 + - + name: id-loc + type: u8 + - + name: pad + type: pad + - + name: endpoint + name-prefix: mptcp-pm-endpoint- + attributes: + - + name: addr + type: nest + nested-attributes: address + - + name: attr + name-prefix: mptcp-pm-attr- + attr-cnt-name: --mptcp-attr-after-last + attributes: + - + name: unspec + type: unused + value: 0 + - + name: addr + type: nest + nested-attributes: address + - + name: rcv-add-addrs + type: u32 + - + name: subflows + type: u32 + - + name: token + type: u32 + - + name: loc-id + type: u8 + - + name: addr-remote + type: nest + nested-attributes: address + - + name: event-attr + enum-name: mptcp-event-attr + name-prefix: mptcp-attr- + attributes: + - + name: unspec + type: unused + value: 0 + - + name: token + type: u32 + - + name: family + type: u16 + - + name: loc-id + type: u8 + - + name: rem-id + type: u8 + - + name: saddr4 + type: u32 + byte-order: big-endian + - + name: saddr6 + type: binary + checks: + min-len: 16 + - + name: daddr4 + type: u32 + byte-order: big-endian + - + name: daddr6 + type: binary + checks: + min-len: 16 + - + name: sport + type: u16 + byte-order: big-endian + - + name: dport + type: u16 + byte-order: big-endian + - + name: backup + type: u8 + - + name: error + type: u8 + - + name: flags + type: u16 + - + name: timeout + type: u32 + - + name: if_idx + type: u32 + - + name: reset-reason + type: u32 + - + name: reset-flags + type: u32 + - + name: server-side + type: u8 + +operations: + list: + - + name: unspec + doc: unused + value: 0 + - + name: add-addr + doc: Add endpoint + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &add-addr-attrs + request: + attributes: + - addr + - + name: del-addr + doc: Delete endpoint + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: *add-addr-attrs + - + name: get-addr + doc: Get endpoint information + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &get-addr-attrs + request: + attributes: + - addr + reply: + attributes: + - addr + dump: + reply: + attributes: + - addr + - + name: flush-addrs + doc: flush addresses + attribute-set: endpoint + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: *add-addr-attrs + - + name: set-limits + doc: Set protocol limits + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &mptcp-limits + request: + attributes: + - rcv-add-addrs + - subflows + - + name: get-limits + doc: Get protocol limits + attribute-set: attr + dont-validate: [ strict ] + do: &mptcp-get-limits + request: + attributes: + - rcv-add-addrs + - subflows + reply: + attributes: + - rcv-add-addrs + - subflows + - + name: set-flags + doc: Change endpoint flags + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &mptcp-set-flags + request: + attributes: + - addr + - token + - addr-remote + - + name: announce + doc: announce new sf + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &announce-add + request: + attributes: + - addr + - token + - + name: remove + doc: announce removal + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: + request: + attributes: + - token + - loc-id + - + name: subflow-create + doc: todo + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: &sf-create + request: + attributes: + - addr + - token + - addr-remote + - + name: subflow-destroy + doc: todo + attribute-set: attr + dont-validate: [ strict ] + flags: [ uns-admin-perm ] + do: *sf-create diff --git a/MAINTAINERS b/MAINTAINERS index daf440129535..dda78b4ce707 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -15099,7 +15099,7 @@ W: https://github.com/multipath-tcp/mptcp_net-next/wiki B: https://github.com/multipath-tcp/mptcp_net-next/issues T: git https://github.com/multipath-tcp/mptcp_net-next.git export-net T: git https://github.com/multipath-tcp/mptcp_net-next.git export -F: Documentation/netlink/specs/mptcp.yaml +F: Documentation/netlink/specs/mptcp_pm.yaml F: Documentation/networking/mptcp-sysctl.rst F: include/net/mptcp.h F: include/trace/events/mptcp.h diff --git a/include/uapi/linux/mptcp_pm.h b/include/uapi/linux/mptcp_pm.h index b5d11aece408..50589e5dd6a3 100644 --- a/include/uapi/linux/mptcp_pm.h +++ b/include/uapi/linux/mptcp_pm.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN uapi header */ #ifndef _UAPI_LINUX_MPTCP_PM_H diff --git a/net/mptcp/mptcp_pm_gen.c b/net/mptcp/mptcp_pm_gen.c index a2325e70ddab..670da7822e6c 100644 --- a/net/mptcp/mptcp_pm_gen.c +++ b/net/mptcp/mptcp_pm_gen.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN kernel source */ #include diff --git a/net/mptcp/mptcp_pm_gen.h b/net/mptcp/mptcp_pm_gen.h index 10579d184587..ac9fc7225b6a 100644 --- a/net/mptcp/mptcp_pm_gen.h +++ b/net/mptcp/mptcp_pm_gen.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */ /* Do not edit directly, auto-generated from: */ -/* Documentation/netlink/specs/mptcp.yaml */ +/* Documentation/netlink/specs/mptcp_pm.yaml */ /* YNL-GEN kernel header */ #ifndef _LINUX_MPTCP_PM_GEN_H -- cgit v1.3.1 From 758a8d5b6a64ad63a8c0728f68dd3e21481013db Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 13 Dec 2023 17:24:55 -0600 Subject: dt-bindings: net: marvell,orion-mdio: Drop "reg" sizes schema Defining the size of register regions is not really in scope of what bindings need to cover. The schema for this is also not completely correct as a reg entry can be variable number of cells for the address and size, but the schema assumes 1 cell. Signed-off-by: Rob Herring Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20231213232455.2248056-1-robh@kernel.org Signed-off-by: Jakub Kicinski --- .../bindings/net/marvell,orion-mdio.yaml | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml index e35da8b01dc2..73429855d584 100644 --- a/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml +++ b/Documentation/devicetree/bindings/net/marvell,orion-mdio.yaml @@ -39,28 +39,6 @@ required: allOf: - $ref: mdio.yaml# - - if: - required: - - interrupts - - then: - properties: - reg: - items: - - items: - - $ref: /schemas/types.yaml#/definitions/cell - - const: 0x84 - - else: - properties: - reg: - items: - - items: - - $ref: /schemas/types.yaml#/definitions/cell - - enum: - - 0x4 - - 0x10 - unevaluatedProperties: false examples: -- cgit v1.3.1 From de2d98743b83cf970f75b7cbf650c08a659121a5 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:09 +0000 Subject: doc/netlink: Add sub-message support to netlink-raw Add a 'sub-message' attribute type with a selector that supports polymorphic attribute formats for raw netlink families like tc. A sub-message attribute uses the value of another attribute as a selector key to choose the right sub-message format. For example if the following attribute has already been decoded: { "kind": "gre" } and we encounter the following attribute spec: - name: data type: sub-message sub-message: linkinfo-data-msg selector: kind Then we look for a sub-message definition called 'linkinfo-data-msg' and use the value of the 'kind' attribute i.e. 'gre' as the key to choose the correct format for the sub-message: sub-messages: name: linkinfo-data-msg formats: - value: bridge attribute-set: linkinfo-bridge-attrs - value: gre attribute-set: linkinfo-gre-attrs - value: geneve attribute-set: linkinfo-geneve-attrs This would decode the attribute value as a sub-message with the attribute-set called 'linkinfo-gre-attrs' as the attribute space. A sub-message can have an optional 'fixed-header' followed by zero or more attributes from an attribute-set. For example the following 'tc-options-msg' sub-message defines message formats that use a mixture of fixed-header, attribute-set or both together: sub-messages: - name: tc-options-msg formats: - value: bfifo fixed-header: tc-fifo-qopt - value: cake attribute-set: tc-cake-attrs - value: netem fixed-header: tc-netem-qopt attribute-set: tc-netem-attrs Reviewed-by: Jakub Kicinski Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-3-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/netlink-raw.yaml | 65 ++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/netlink-raw.yaml b/Documentation/netlink/netlink-raw.yaml index ad5395040765..04b92f1a5cd6 100644 --- a/Documentation/netlink/netlink-raw.yaml +++ b/Documentation/netlink/netlink-raw.yaml @@ -126,8 +126,10 @@ properties: name: type: string type: - description: The netlink attribute type - enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary ] + description: | + The netlink attribute type. Members of type 'binary' or 'pad' + must also have the 'len' property set. + enum: [ u8, u16, u32, u64, s8, s16, s32, s64, string, binary, pad ] len: $ref: '#/$defs/len-or-define' byte-order: @@ -150,6 +152,14 @@ properties: the right formatting mechanism when displaying values of this type. enum: [ hex, mac, fddi, ipv4, ipv6, uuid ] + if: + properties: + type: + oneOf: + - const: binary + - const: pad + then: + required: [ len ] # End genetlink-legacy attribute-sets: @@ -202,7 +212,8 @@ properties: description: The netlink attribute type enum: [ unused, pad, flag, binary, bitfield32, u8, u16, u32, u64, s8, s16, s32, s64, - string, nest, array-nest, nest-type-value ] + string, nest, array-nest, nest-type-value, + sub-message ] doc: description: Documentation of the attribute. type: string @@ -261,6 +272,17 @@ properties: description: Name of the struct type used for the attribute. type: string # End genetlink-legacy + # Start netlink-raw + sub-message: + description: | + Name of the sub-message definition to use for the attribute. + type: string + selector: + description: | + Name of the attribute to use for dynamic selection of sub-message + format specifier. + type: string + # End netlink-raw # Make sure name-prefix does not appear in subsets (subsets inherit naming) dependencies: @@ -283,6 +305,43 @@ properties: items: required: [ type ] + # Start netlink-raw + sub-messages: + description: Definition of sub message attributes + type: array + items: + type: object + additionalProperties: False + required: [ name, formats ] + properties: + name: + description: Name of the sub-message definition + type: string + formats: + description: Dynamically selected format specifiers + type: array + items: + type: object + additionalProperties: False + required: [ value ] + properties: + value: + description: | + Value to match for dynamic selection of sub-message format + specifier. + type: string + fixed-header: + description: | + Name of the struct definition to use as the fixed header + for the sub message. + type: string + attribute-set: + description: | + Name of the attribute space from which to resolve attributes + in the sub message. + type: string + # End netlink-raw + operations: description: Operations supported by the protocol. type: object -- cgit v1.3.1 From 17ed5c1a9e3674a7e6b3e7bd66824ecd79ecce02 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:10 +0000 Subject: doc/netlink: Document the sub-message format for netlink-raw Document the spec format used by netlink-raw families like rt and tc. Reviewed-by: Jakub Kicinski Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-4-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- .../userspace-api/netlink/netlink-raw.rst | 96 +++++++++++++++++++++- 1 file changed, 95 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/userspace-api/netlink/netlink-raw.rst b/Documentation/userspace-api/netlink/netlink-raw.rst index f07fb9b9c101..1e14f5f22b8e 100644 --- a/Documentation/userspace-api/netlink/netlink-raw.rst +++ b/Documentation/userspace-api/netlink/netlink-raw.rst @@ -14,7 +14,8 @@ Specification The netlink-raw schema extends the :doc:`genetlink-legacy ` schema with properties that are needed to specify the protocol numbers and multicast IDs used by raw netlink families. See :ref:`classic_netlink` for more -information. +information. The raw netlink families also make use of type-specific +sub-messages. Globals ------- @@ -56,3 +57,96 @@ group registration. - name: rtnlgrp-mctp-ifaddr value: 34 + +Sub-messages +------------ + +Several raw netlink families such as +:doc:`rt_link<../../networking/netlink_spec/rt_link>` and +:doc:`tc<../../networking/netlink_spec/tc>` use attribute nesting as an +abstraction to carry module specific information. + +Conceptually it looks as follows:: + + [OUTER NEST OR MESSAGE LEVEL] + [GENERIC ATTR 1] + [GENERIC ATTR 2] + [GENERIC ATTR 3] + [GENERIC ATTR - wrapper] + [MODULE SPECIFIC ATTR 1] + [MODULE SPECIFIC ATTR 2] + +The ``GENERIC ATTRs`` at the outer level are defined in the core (or rt_link or +core TC), while specific drivers, TC classifiers, qdiscs etc. can carry their +own information wrapped in the ``GENERIC ATTR - wrapper``. Even though the +example above shows attributes nesting inside the wrapper, the modules generally +have full freedom to define the format of the nest. In practice the payload of +the wrapper attr has very similar characteristics to a netlink message. It may +contain a fixed header / structure, netlink attributes, or both. Because of +those shared characteristics we refer to the payload of the wrapper attribute as +a sub-message. + +A sub-message attribute uses the value of another attribute as a selector key to +choose the right sub-message format. For example if the following attribute has +already been decoded: + +.. code-block:: json + + { "kind": "gre" } + +and we encounter the following attribute spec: + +.. code-block:: yaml + + - + name: data + type: sub-message + sub-message: linkinfo-data-msg + selector: kind + +Then we look for a sub-message definition called ``linkinfo-data-msg`` and use +the value of the ``kind`` attribute i.e. ``gre`` as the key to choose the +correct format for the sub-message: + +.. code-block:: yaml + + sub-messages: + name: linkinfo-data-msg + formats: + - + value: bridge + attribute-set: linkinfo-bridge-attrs + - + value: gre + attribute-set: linkinfo-gre-attrs + - + value: geneve + attribute-set: linkinfo-geneve-attrs + +This would decode the attribute value as a sub-message with the attribute-set +called ``linkinfo-gre-attrs`` as the attribute space. + +A sub-message can have an optional ``fixed-header`` followed by zero or more +attributes from an ``attribute-set``. For example the following +``tc-options-msg`` sub-message defines message formats that use a mixture of +``fixed-header``, ``attribute-set`` or both together: + +.. code-block:: yaml + + sub-messages: + - + name: tc-options-msg + formats: + - + value: bfifo + fixed-header: tc-fifo-qopt + - + value: cake + attribute-set: tc-cake-attrs + - + value: netem + fixed-header: tc-netem-qopt + attribute-set: tc-netem-attrs + +Note that a selector attribute must appear in a netlink message before any +sub-message attributes that depend on it. -- cgit v1.3.1 From 077b6022d24bef54f72d0aeb81fbeca8e900c94e Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:13 +0000 Subject: doc/netlink/specs: Add sub-message type to rt_link family Start using sub-message selectors in the rt_link spec for the link-specific 'data' and 'slave-data' attributes. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-7-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 436 ++++++++++++++++++++++++++++++- 1 file changed, 432 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index d86a68f8475c..ea6a6157d718 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -82,6 +82,18 @@ definitions: - name: ifi-change type: u32 + - + name: ifla-bridge-id + type: struct + members: + - + name: prio + type: u16 + - + name: addr + type: binary + len: 6 + display-hint: mac - name: ifla-cacheinfo type: struct @@ -966,8 +978,9 @@ attribute-sets: type: string - name: data - type: binary - # kind specific nest, e.g. linkinfo-bridge-attrs + type: sub-message + sub-message: linkinfo-data-msg + selector: kind - name: xstats type: binary @@ -976,10 +989,12 @@ attribute-sets: type: string - name: slave-data - type: binary - # kind specific nest + type: sub-message + sub-message: linkinfo-member-data-msg + selector: slave-kind - name: linkinfo-bridge-attrs + name-prefix: ifla-br- attributes: - name: forward-delay @@ -1011,9 +1026,11 @@ attribute-sets: - name: root-id type: binary + struct: ifla-bridge-id - name: bridge-id type: binary + struct: ifla-bridge-id - name: root-port type: u16 @@ -1041,6 +1058,7 @@ attribute-sets: - name: group-addr type: binary + display-hint: mac - name: fdb-flush type: binary @@ -1123,6 +1141,376 @@ attribute-sets: - name: mcast-querier-state type: binary + - + name: linkinfo-brport-attrs + name-prefix: ifla-brport- + attributes: + - + name: state + type: u8 + - + name: priority + type: u16 + - + name: cost + type: u32 + - + name: mode + type: flag + - + name: guard + type: flag + - + name: protect + type: flag + - + name: fast-leave + type: flag + - + name: learning + type: flag + - + name: unicast-flood + type: flag + - + name: proxyarp + type: flag + - + name: learning-sync + type: flag + - + name: proxyarp-wifi + type: flag + - + name: root-id + type: binary + struct: ifla-bridge-id + - + name: bridge-id + type: binary + struct: ifla-bridge-id + - + name: designated-port + type: u16 + - + name: designated-cost + type: u16 + - + name: id + type: u16 + - + name: "no" + type: u16 + - + name: topology-change-ack + type: u8 + - + name: config-pending + type: u8 + - + name: message-age-timer + type: u64 + - + name: forward-delay-timer + type: u64 + - + name: hold-timer + type: u64 + - + name: flush + type: flag + - + name: multicast-router + type: u8 + - + name: pad + type: pad + - + name: mcast-flood + type: flag + - + name: mcast-to-ucast + type: flag + - + name: vlan-tunnel + type: flag + - + name: bcast-flood + type: flag + - + name: group-fwd-mask + type: u16 + - + name: neigh-suppress + type: flag + - + name: isolated + type: flag + - + name: backup-port + type: u32 + - + name: mrp-ring-open + type: flag + - + name: mrp-in-open + type: flag + - + name: mcast-eht-hosts-limit + type: u32 + - + name: mcast-eht-hosts-cnt + type: u32 + - + name: locked + type: flag + - + name: mab + type: flag + - + name: mcast-n-groups + type: u32 + - + name: mcast-max-groups + type: u32 + - + name: neigh-vlan-suppress + type: flag + - + name: backup-nhid + type: u32 + - + name: linkinfo-gre-attrs + name-prefix: ifla-gre- + attributes: + - + name: link + type: u32 + - + name: iflags + type: u16 + - + name: oflags + type: u16 + - + name: ikey + type: u32 + - + name: okey + type: u32 + - + name: local + type: binary + display-hint: ipv4 + - + name: remote + type: binary + display-hint: ipv4 + - + name: ttl + type: u8 + - + name: tos + type: u8 + - + name: pmtudisc + type: u8 + - + name: encap-limit + type: u32 + - + name: flowinfo + type: u32 + - + name: flags + type: u32 + - + name: encap-type + type: u16 + - + name: encap-flags + type: u16 + - + name: encap-sport + type: u16 + - + name: encap-dport + type: u16 + - + name: collect-metadata + type: flag + - + name: ignore-df + type: u8 + - + name: fwmark + type: u32 + - + name: erspan-index + type: u32 + - + name: erspan-ver + type: u8 + - + name: erspan-dir + type: u8 + - + name: erspan-hwid + type: u16 + - + name: linkinfo-geneve-attrs + name-prefix: ifla-geneve- + attributes: + - + name: id + type: u32 + - + name: remote + type: binary + display-hint: ipv4 + - + name: ttl + type: u8 + - + name: tos + type: u8 + - + name: port + type: u16 + - + name: collect-metadata + type: flag + - + name: remote6 + type: binary + display-hint: ipv6 + - + name: udp-csum + type: u8 + - + name: udp-zero-csum6-tx + type: u8 + - + name: udp-zero-csum6-rx + type: u8 + - + name: label + type: u32 + - + name: ttl-inherit + type: u8 + - + name: df + type: u8 + - + name: inner-proto-inherit + type: flag + - + name: linkinfo-iptun-attrs + name-prefix: ifla-iptun- + attributes: + - + name: link + type: u32 + - + name: local + type: binary + display-hint: ipv4 + - + name: remote + type: binary + display-hint: ipv4 + - + name: ttl + type: u8 + - + name: tos + type: u8 + - + name: encap-limit + type: u8 + - + name: flowinfo + type: u32 + - + name: flags + type: u16 + - + name: proto + type: u8 + - + name: pmtudisc + type: u8 + - + name: 6rd-prefix + type: binary + display-hint: ipv6 + - + name: 6rd-relay-prefix + type: binary + display-hint: ipv4 + - + name: 6rd-prefixlen + type: u16 + - + name: 6rd-relay-prefixlen + type: u16 + - + name: encap-type + type: u16 + - + name: encap-flags + type: u16 + - + name: encap-sport + type: u16 + - + name: encap-dport + type: u16 + - + name: collect-metadata + type: flag + - + name: fwmark + type: u32 + - + name: linkinfo-tun-attrs + name-prefix: ifla-tun- + attributes: + - + name: owner + type: u32 + - + name: group + type: u32 + - + name: type + type: u8 + - + name: pi + type: u8 + - + name: vnet-hdr + type: u8 + - + name: persist + type: u8 + - + name: multi-queue + type: u8 + - + name: num-queues + type: u32 + - + name: num-disabled-queues + type: u32 + - + name: linkinfo-vrf-attrs + name-prefix: ifla-vrf- + attributes: + - + name: table + type: u32 - name: xdp-attrs attributes: @@ -1241,6 +1629,46 @@ attribute-sets: name: used type: u8 +sub-messages: + - + name: linkinfo-data-msg + formats: + - + value: bridge + attribute-set: linkinfo-bridge-attrs + - + value: erspan + attribute-set: linkinfo-gre-attrs + - + value: gre + attribute-set: linkinfo-gre-attrs + - + value: gretap + attribute-set: linkinfo-gre-attrs + - + value: geneve + attribute-set: linkinfo-geneve-attrs + - + value: ipip + attribute-set: linkinfo-iptun-attrs + - + value: sit + attribute-set: linkinfo-iptun-attrs + - + value: tun + attribute-set: linkinfo-tun-attrs + - + value: vrf + attribute-set: linkinfo-vrf-attrs + - + name: linkinfo-member-data-msg + formats: + - + value: bridge + attribute-set: linkinfo-brport-attrs + - + value: bond + operations: enum-model: directional list: -- cgit v1.3.1 From 6b4b0754ef8a3fb3c5f772cf6c70bd33936b6431 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:14 +0000 Subject: doc/netlink/specs: use pad in structs in rt_link The rt_link spec was using pad1, pad2 attributes in structs which appears in the ynl output. Replace this with the 'pad' type which doesn't pollute the output. Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-8-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/rt_link.yaml | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/rt_link.yaml b/Documentation/netlink/specs/rt_link.yaml index ea6a6157d718..1ad01d52a863 100644 --- a/Documentation/netlink/specs/rt_link.yaml +++ b/Documentation/netlink/specs/rt_link.yaml @@ -66,8 +66,9 @@ definitions: name: ifi-family type: u8 - - name: padding - type: u8 + name: pad + type: pad + len: 1 - name: ifi-type type: u16 @@ -719,11 +720,9 @@ definitions: name: family type: u8 - - name: pad1 - type: u8 - - - name: pad2 - type: u16 + name: pad + type: pad + len: 3 - name: ifindex type: u32 -- cgit v1.3.1 From a1bcfde83669accf5890ff33e4ba5f3ccbbc3047 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:15 +0000 Subject: doc/netlink/specs: Add a spec for tc This is a work-in-progress spec for tc that covers: - most of the qdiscs - the flower classifier - new, del, get for qdisc, chain, class and filter Notable omissions: - most of the stats attrs are left as binary blobs - notifications are not yet implemented Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-9-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/tc.yaml | 2031 +++++++++++++++++++++++++++++++++++ 1 file changed, 2031 insertions(+) create mode 100644 Documentation/netlink/specs/tc.yaml (limited to 'Documentation') diff --git a/Documentation/netlink/specs/tc.yaml b/Documentation/netlink/specs/tc.yaml new file mode 100644 index 000000000000..4346fa402fc9 --- /dev/null +++ b/Documentation/netlink/specs/tc.yaml @@ -0,0 +1,2031 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: tc +protocol: netlink-raw +protonum: 0 + +doc: + Netlink raw family for tc qdisc, chain, class and filter configuration + over rtnetlink. + +definitions: + - + name: tcmsg + type: struct + members: + - + name: family + type: u8 + - + name: pad + type: pad + len: 3 + - + name: ifindex + type: s32 + - + name: handle + type: u32 + - + name: parent + type: u32 + - + name: info + type: u32 + - + name: tc-cls-flags + type: flags + entries: + - skip-hw + - skip-sw + - in-hw + - not-in-nw + - verbose + - + name: tc-stats + type: struct + members: + - + name: bytes + type: u64 + - + name: packets + type: u32 + - + name: drops + type: u32 + - + name: overlimits + type: u32 + - + name: bps + type: u32 + - + name: pps + type: u32 + - + name: qlen + type: u32 + - + name: backlog + type: u32 + - + name: tc-cbs-qopt + type: struct + members: + - + name: offload + type: u8 + - + name: pad + type: pad + len: 3 + - + name: hicredit + type: s32 + - + name: locredit + type: s32 + - + name: idleslope + type: s32 + - + name: sendslope + type: s32 + - + name: tc-etf-qopt + type: struct + members: + - + name: delta + type: s32 + - + name: clockid + type: s32 + - + name: flags + type: s32 + - + name: tc-fifo-qopt + type: struct + members: + - + name: limit + type: u32 + - + name: tc-htb-opt + type: struct + members: + - + name: rate + type: binary + len: 12 + - + name: ceil + type: binary + len: 12 + - + name: buffer + type: u32 + - + name: cbuffer + type: u32 + - + name: quantum + type: u32 + - + name: level + type: u32 + - + name: prio + type: u32 + - + name: tc-htb-glob + type: struct + members: + - + name: version + type: u32 + - + name: rate2quantum + type: u32 + - + name: defcls + type: u32 + - + name: debug + type: u32 + - + name: direct-pkts + type: u32 + - + name: tc-gred-qopt + type: struct + members: + - + name: limit + type: u32 + - + name: qth-min + type: u32 + - + name: qth-max + type: u32 + - + name: DP + type: u32 + - + name: backlog + type: u32 + - + name: qave + type: u32 + - + name: forced + type: u32 + - + name: early + type: u32 + - + name: other + type: u32 + - + name: pdrop + type: u32 + - + name: Wlog + type: u8 + - + name: Plog + type: u8 + - + name: Scell_log + type: u8 + - + name: prio + type: u8 + - + name: packets + type: u32 + - + name: bytesin + type: u32 + - + name: tc-gred-sopt + type: struct + members: + - + name: DPs + type: u32 + - + name: def_DP + type: u32 + - + name: grio + type: u8 + - + name: flags + type: u8 + - + name: pad + type: pad + len: 2 + - + name: tc-hfsc-qopt + type: struct + members: + - + name: defcls + type: u16 + - + name: tc-mqprio-qopt + type: struct + members: + - + name: num-tc + type: u8 + - + name: prio-tc-map + type: binary + len: 16 + - + name: hw + type: u8 + - + name: count + type: binary + len: 32 + - + name: offset + type: binary + len: 32 + - + name: tc-multiq-qopt + type: struct + members: + - + name: bands + type: u16 + - + name: max-bands + type: u16 + - + name: tc-netem-qopt + type: struct + members: + - + name: latency + type: u32 + - + name: limit + type: u32 + - + name: loss + type: u32 + - + name: gap + type: u32 + - + name: duplicate + type: u32 + - + name: jitter + type: u32 + - + name: tc-plug-qopt + type: struct + members: + - + name: action + type: s32 + - + name: limit + type: u32 + - + name: tc-prio-qopt + type: struct + members: + - + name: bands + type: u16 + - + name: priomap + type: binary + len: 16 + - + name: tc-red-qopt + type: struct + members: + - + name: limit + type: u32 + - + name: qth-min + type: u32 + - + name: qth-max + type: u32 + - + name: Wlog + type: u8 + - + name: Plog + type: u8 + - + name: Scell-log + type: u8 + - + name: flags + type: u8 + - + name: tc-sfb-qopt + type: struct + members: + - + name: rehash-interval + type: u32 + - + name: warmup-time + type: u32 + - + name: max + type: u32 + - + name: bin-size + type: u32 + - + name: increment + type: u32 + - + name: decrement + type: u32 + - + name: limit + type: u32 + - + name: penalty-rate + type: u32 + - + name: penalty-burst + type: u32 + - + name: tc-sfq-qopt-v1 # TODO nested structs + type: struct + members: + - + name: quantum + type: u32 + - + name: perturb-period + type: s32 + - + name: limit + type: u32 + - + name: divisor + type: u32 + - + name: flows + type: u32 + - + name: depth + type: u32 + - + name: headdrop + type: u32 + - + name: limit + type: u32 + - + name: qth-min + type: u32 + - + name: qth-mac + type: u32 + - + name: Wlog + type: u8 + - + name: Plog + type: u8 + - + name: Scell-log + type: u8 + - + name: flags + type: u8 + - + name: max-P + type: u32 + - + name: prob-drop + type: u32 + - + name: forced-drop + type: u32 + - + name: prob-mark + type: u32 + - + name: forced-mark + type: u32 + - + name: prob-mark-head + type: u32 + - + name: forced-mark-head + type: u32 + - + name: tc-tbf-qopt + type: struct + members: + - + name: rate + type: binary # TODO nested struct tc_ratespec + len: 12 + - + name: peakrate + type: binary # TODO nested struct tc_ratespec + len: 12 + - + name: limit + type: u32 + - + name: buffer + type: u32 + - + name: mtu + type: u32 + - + name: tc-sizespec + type: struct + members: + - + name: cell-log + type: u8 + - + name: size-log + type: u8 + - + name: cell-align + type: s16 + - + name: overhead + type: s32 + - + name: linklayer + type: u32 + - + name: mpu + type: u32 + - + name: mtu + type: u32 + - + name: tsize + type: u32 + - + name: gnet-estimator + type: struct + members: + - + name: interval + type: s8 + - + name: ewma-log + type: u8 +attribute-sets: + - + name: tc-attrs + attributes: + - + name: kind + type: string + - + name: options + type: sub-message + sub-message: tc-options-msg + selector: kind + - + name: stats + type: binary + struct: tc-stats + - + name: xstats + type: binary + - + name: rate + type: binary + struct: gnet-estimator + - + name: fcnt + type: u32 + - + name: stats2 + type: nest + nested-attributes: tca-stats-attrs + - + name: stab + type: nest + nested-attributes: tca-stab-attrs + - + name: pad + type: pad + - + name: dump-invisible + type: flag + - + name: chain + type: u32 + - + name: hw-offload + type: u8 + - + name: ingress-block + type: u32 + - + name: egress-block + type: u32 + - + name: dump-flags + type: bitfield32 + - + name: ext-warn-msg + type: string + - + name: tc-cake-attrs + attributes: + - + name: pad + type: pad + - + name: base-rate64 + type: u64 + - + name: diffserv-mode + type: u32 + - + name: atm + type: u32 + - + name: flow-mode + type: u32 + - + name: overhead + type: u32 + - + name: rtt + type: u32 + - + name: target + type: u32 + - + name: autorate + type: u32 + - + name: memory + type: u32 + - + name: nat + type: u32 + - + name: raw + type: u32 + - + name: wash + type: u32 + - + name: mpu + type: u32 + - + name: ingress + type: u32 + - + name: ack-filter + type: u32 + - + name: split-gso + type: u32 + - + name: fwmark + type: u32 + - + name: tc-cake-stats-attrs + attributes: + - + name: pad + type: pad + - + name: capacity-estimate64 + type: u64 + - + name: memory-limit + type: u32 + - + name: memory-used + type: u32 + - + name: avg-netoff + type: u32 + - + name: min-netlen + type: u32 + - + name: max-netlen + type: u32 + - + name: min-adjlen + type: u32 + - + name: max-adjlen + type: u32 + - + name: tin-stats + type: binary + - + name: deficit + type: s32 + - + name: cobalt-count + type: u32 + - + name: dropping + type: u32 + - + name: drop-next-us + type: s32 + - + name: p-drop + type: u32 + - + name: blue-timer-us + type: s32 + - + name: tc-cbs-attrs + attributes: + - + name: parms + type: binary + struct: tc-cbs-qopt + - + name: tc-choke-attrs + attributes: + - + name: parms + type: binary + struct: tc-red-qopt + - + name: stab + type: binary + - + name: max-p + type: u32 + - + name: tc-codel-attrs + attributes: + - + name: target + type: u32 + - + name: limit + type: u32 + - + name: interval + type: u32 + - + name: ecn + type: u32 + - + name: ce-threshold + type: u32 + - + name: tc-drr-attrs + attributes: + - + name: quantum + type: u32 + - + name: tc-flower-attrs + attributes: + - + name: classid + type: u32 + - + name: indev + type: string + - + name: act + type: array-nest + nested-attributes: tc-act-attrs + - + name: key-eth-dst + type: binary + display-hint: mac + - + name: key-eth-dst-mask + type: binary + display-hint: mac + - + name: key-eth-src + type: binary + display-hint: mac + - + name: key-eth-src-mask + type: binary + display-hint: mac + - + name: key-eth-type + type: u16 + byte-order: big-endian + - + name: key-ip-proto + type: u8 + - + name: key-ipv4-src + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-ipv4-src-mask + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-ipv4-dst + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-ipv4-dst-mask + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-ipv6-src + type: binary + display-hint: ipv6 + - + name: key-ipv6-src-mask + type: binary + display-hint: ipv6 + - + name: key-ipv6-dst + type: binary + display-hint: ipv6 + - + name: key-ipv6-dst-mask + type: binary + display-hint: ipv6 + - + name: key-tcp-src + type: u16 + byte-order: big-endian + - + name: key-tcp-dst + type: u16 + byte-order: big-endian + - + name: key-udp-src + type: u16 + byte-order: big-endian + - + name: key-udp-dst + type: u16 + byte-order: big-endian + - + name: flags + type: u32 + enum: tc-cls-flags + enum-as-flags: true + - + name: key-vlan-id + type: u16 + byte-order: big-endian + - + name: key-vlan-prio + type: u8 + - + name: key-vlan-eth-type + type: u16 + byte-order: big-endian + - + name: key-enc-key-id + type: u32 + byte-order: big-endian + - + name: key-enc-ipv4-src + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-enc-ipv4-src-mask + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-enc-ipv4-dst + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-enc-ipv4-dst-mask + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: key-enc-ipv6-src + type: binary + display-hint: ipv6 + - + name: key-enc-ipv6-src-mask + type: binary + display-hint: ipv6 + - + name: key-enc-ipv6-dst + type: binary + display-hint: ipv6 + - + name: key-enc-ipv6-dst-mask + type: binary + display-hint: ipv6 + - + name: key-tcp-src-mask + type: u16 + byte-order: big-endian + - + name: key-tcp-dst-mask + type: u16 + byte-order: big-endian + - + name: key-udp-src-mask + type: u16 + byte-order: big-endian + - + name: key-udp-dst-mask + type: u16 + byte-order: big-endian + - + name: key-sctp-src-mask + type: u16 + byte-order: big-endian + - + name: key-sctp-dst-mask + type: u16 + byte-order: big-endian + - + name: key-sctp-src + type: u16 + byte-order: big-endian + - + name: key-sctp-dst + type: u16 + byte-order: big-endian + - + name: key-enc-udp-src-port + type: u16 + byte-order: big-endian + - + name: key-enc-udp-src-port-mask + type: u16 + byte-order: big-endian + - + name: key-enc-udp-dst-port + type: u16 + byte-order: big-endian + - + name: key-enc-udp-dst-port-mask + type: u16 + byte-order: big-endian + - + name: key-flags + type: u32 + byte-order: big-endian + - + name: key-flags-mask + type: u32 + byte-order: big-endian + - + name: key-icmpv4-code + type: u8 + - + name: key-icmpv4-code-mask + type: u8 + - + name: key-icmpv4-type + type: u8 + - + name: key-icmpv4-type-mask + type: u8 + - + name: key-icmpv6-code + type: u8 + - + name: key-icmpv6-code-mask + type: u8 + - + name: key-icmpv6-type + type: u8 + - + name: key-icmpv6-type-mask + type: u8 + - + name: key-arp-sip + type: u32 + byte-order: big-endian + - + name: key-arp-sip-mask + type: u32 + byte-order: big-endian + - + name: key-arp-tip + type: u32 + byte-order: big-endian + - + name: key-arp-tip-mask + type: u32 + byte-order: big-endian + - + name: key-arp-op + type: u8 + - + name: key-arp-op-mask + type: u8 + - + name: key-arp-sha + type: binary + - + name: key-arp-sha-mask + type: binary + - + name: key-arp-tha + type: binary + - + name: key-arp-tha-mask + type: binary + - + name: key-mpls-ttl + type: u8 + - + name: key-mpls-bos + type: u8 + - + name: key-mpls-tc + type: u8 + - + name: key-mpls-label + type: u32 + byte-order: big-endian + - + name: key-tcp-flags + type: u16 + byte-order: big-endian + - + name: key-tcp-flags-mask + type: u16 + byte-order: big-endian + - + name: key-ip-tos + type: u8 + - + name: key-ip-tos-mask + type: u8 + - + name: key-ip-ttl + type: u8 + - + name: key-ip-ttl-mask + type: u8 + - + name: key-cvlan-id + type: u16 + byte-order: big-endian + - + name: key-cvlan-prio + type: u8 + - + name: key-cvlan-eth-type + type: u16 + byte-order: big-endian + - + name: key-enc-ip-tos + type: u8 + - + name: key-enc-ip-tos-mask + type: u8 + - + name: key-enc-ip-ttl + type: u8 + - + name: key-enc-ip-ttl-mask + type: u8 + - + name: key-enc-opts + type: binary + - + name: key-enc-opts-mask + type: binary + - + name: in-hw-count + type: u32 + - + name: key-port-src-min + type: u16 + byte-order: big-endian + - + name: key-port-src-max + type: u16 + byte-order: big-endian + - + name: key-port-dst-min + type: u16 + byte-order: big-endian + - + name: key-port-dst-max + type: u16 + byte-order: big-endian + - + name: key-ct-state + type: u16 + - + name: key-ct-state-mask + type: u16 + - + name: key-ct-zone + type: u16 + - + name: key-ct-zone-mask + type: u16 + - + name: key-ct-mark + type: u32 + - + name: key-ct-mark-mask + type: u32 + - + name: key-ct-labels + type: binary + - + name: key-ct-labels-mask + type: binary + - + name: key-mpls-opts + type: binary + - + name: key-hash + type: u32 + - + name: key-hash-mask + type: u32 + - + name: key-num-of-vlans + type: u8 + - + name: key-pppoe-sid + type: u16 + byte-order: big-endian + - + name: key-ppp-proto + type: u16 + byte-order: big-endian + - + name: key-l2-tpv3-sid + type: u32 + byte-order: big-endian + - + name: tc-gred-attrs + attributes: + - + name: parms + type: binary # array of struct: tc-gred-qopt + - + name: stab + type: binary + sub-type: u8 + - + name: dps + type: binary + struct: tc-gred-sopt + - + name: max-p + type: binary + sub-type: u32 + - + name: limit + type: u32 + - + name: vq-list + type: nest + nested-attributes: tca-gred-vq-list-attrs + - + name: tca-gred-vq-list-attrs + attributes: + - + name: entry + type: nest + nested-attributes: tca-gred-vq-entry-attrs + multi-attr: true + - + name: tca-gred-vq-entry-attrs + attributes: + - + name: pad + type: pad + - + name: dp + type: u32 + - + name: stat-bytes + type: u32 + - + name: stat-packets + type: u32 + - + name: stat-backlog + type: u32 + - + name: stat-prob-drop + type: u32 + - + name: stat-prob-mark + type: u32 + - + name: stat-forced-drop + type: u32 + - + name: stat-forced-mark + type: u32 + - + name: stat-pdrop + type: u32 + - + name: stat-other + type: u32 + - + name: flags + type: u32 + - + name: tc-hfsc-attrs + attributes: + - + name: rsc + type: binary + - + name: fsc + type: binary + - + name: usc + type: binary + - + name: tc-hhf-attrs + attributes: + - + name: backlog-limit + type: u32 + - + name: quantum + type: u32 + - + name: hh-flows-limit + type: u32 + - + name: reset-timeout + type: u32 + - + name: admit-bytes + type: u32 + - + name: evict-timeout + type: u32 + - + name: non-hh-weight + type: u32 + - + name: tc-htb-attrs + attributes: + - + name: parms + type: binary + struct: tc-htb-opt + - + name: init + type: binary + struct: tc-htb-glob + - + name: ctab + type: binary + - + name: rtab + type: binary + - + name: direct-qlen + type: u32 + - + name: rate64 + type: u64 + - + name: ceil64 + type: u64 + - + name: pad + type: pad + - + name: offload + type: flag + - + name: tc-act-attrs + attributes: + - + name: kind + type: string + - + name: options + type: sub-message + sub-message: tc-act-options-msg + selector: kind + - + name: index + type: u32 + - + name: stats + type: binary + - + name: pad + type: pad + - + name: cookie + type: binary + - + name: flags + type: bitfield32 + - + name: hw-stats + type: bitfield32 + - + name: used-hw-stats + type: bitfield32 + - + name: in-hw-count + type: u32 + - + name: tc-etf-attrs + attributes: + - + name: parms + type: binary + struct: tc-etf-qopt + - + name: tc-ets-attrs + attributes: + - + name: nbands + type: u8 + - + name: nstrict + type: u8 + - + name: quanta + type: nest + nested-attributes: tc-ets-attrs + - + name: quanta-band + type: u32 + multi-attr: true + - + name: priomap + type: nest + nested-attributes: tc-ets-attrs + - + name: priomap-band + type: u8 + multi-attr: true + - + name: tc-fq-attrs + attributes: + - + name: plimit + type: u32 + - + name: flow-plimit + type: u32 + - + name: quantum + type: u32 + - + name: initial-quantum + type: u32 + - + name: rate-enable + type: u32 + - + name: flow-default-rate + type: u32 + - + name: flow-max-rate + type: u32 + - + name: buckets-log + type: u32 + - + name: flow-refill-delay + type: u32 + - + name: orphan-mask + type: u32 + - + name: low-rate-threshold + type: u32 + - + name: ce-threshold + type: u32 + - + name: timer-slack + type: u32 + - + name: horizon + type: u32 + - + name: horizon-drop + type: u8 + - + name: tc-fq-codel-attrs + attributes: + - + name: target + type: u32 + - + name: limit + type: u32 + - + name: interval + type: u32 + - + name: ecn + type: u32 + - + name: flows + type: u32 + - + name: quantum + type: u32 + - + name: ce-threshold + type: u32 + - + name: drop-batch-size + type: u32 + - + name: memory-limit + type: u32 + - + name: ce-threshold-selector + type: u8 + - + name: ce-threshold-mask + type: u8 + - + name: tc-fq-pie-attrs + attributes: + - + name: limit + type: u32 + - + name: flows + type: u32 + - + name: target + type: u32 + - + name: tupdate + type: u32 + - + name: alpha + type: u32 + - + name: beta + type: u32 + - + name: quantum + type: u32 + - + name: memory-limit + type: u32 + - + name: ecn-prob + type: u32 + - + name: ecn + type: u32 + - + name: bytemode + type: u32 + - + name: dq-rate-estimator + type: u32 + - + name: tc-netem-attrs + attributes: + - + name: corr + type: binary + - + name: delay-dist + type: binary + sub-type: s16 + - + name: reorder + type: binary + - + name: corrupt + type: binary + - + name: loss + type: binary + - + name: rate + type: binary + - + name: ecn + type: u32 + - + name: rate64 + type: u64 + - + name: pad + type: u32 + - + name: latency64 + type: s64 + - + name: jitter64 + type: s64 + - + name: slot + type: binary + - + name: slot-dist + type: binary + sub-type: s16 + - + name: tc-pie-attrs + attributes: + - + name: target + type: u32 + - + name: limit + type: u32 + - + name: tupdate + type: u32 + - + name: alpha + type: u32 + - + name: beta + type: u32 + - + name: ecn + type: u32 + - + name: bytemode + type: u32 + - + name: dq-rate-estimator + type: u32 + - + name: tc-qfq-attrs + attributes: + - + name: weight + type: u32 + - + name: lmax + type: u32 + - + name: tc-red-attrs + attributes: + - + name: parms + type: binary + struct: tc-red-qopt + - + name: stab + type: binary + - + name: max-p + type: u32 + - + name: flags + type: binary + - + name: early-drop-block + type: u32 + - + name: mark-block + type: u32 + - + name: tc-taprio-attrs + attributes: + - + name: priomap + type: binary + struct: tc-mqprio-qopt + - + name: sched-entry-list + type: nest + nested-attributes: tc-taprio-sched-entry-list + - + name: sched-base-time + type: s64 + - + name: sched-single-entry + type: nest + nested-attributes: tc-taprio-sched-entry + - + name: sched-clockid + type: s32 + - + name: pad + type: pad + - + name: admin-sched + type: binary + - + name: sched-cycle-time + type: s64 + - + name: sched-cycle-time-extension + type: s64 + - + name: flags + type: u32 + - + name: txtime-delay + type: u32 + - + name: tc-entry + type: nest + nested-attributes: tc-taprio-tc-entry-attrs + - + name: tc-taprio-sched-entry-list + attributes: + - + name: entry + type: nest + nested-attributes: tc-taprio-sched-entry + - + name: tc-taprio-sched-entry + attributes: + - + name: index + type: u32 + - + name: cmd + type: u8 + - + name: gate-mask + type: u32 + - + name: interval + type: u32 + - + name: tc-taprio-tc-entry-attrs + attributes: + - + name: index + type: u32 + - + name: max-sdu + type: u32 + - + name: fp + type: u32 + - + name: tc-tbf-attrs + attributes: + - + name: parms + type: binary + struct: tc-tbf-qopt + - + name: rtab + type: binary + - + name: ptab + type: binary + - + name: rate64 + type: u64 + - + name: prate4 + type: u64 + - + name: burst + type: u32 + - + name: pburst + type: u32 + - + name: pad + type: pad + - + name: tca-gact-attrs + attributes: + - + name: tm + type: binary + - + name: parms + type: binary + - + name: prob + type: binary + - + name: pad + type: pad + - + name: tca-stab-attrs + attributes: + - + name: base + type: binary + struct: tc-sizespec + - + name: data + type: binary + - + name: tca-stats-attrs + attributes: + - + name: basic + type: binary + - + name: rate-est + type: binary + - + name: queue + type: binary + - + name: app + type: binary # TODO sub-message needs 2+ level deep lookup + sub-message: tca-stats-app-msg + selector: kind + - + name: rate-est64 + type: binary + - + name: pad + type: pad + - + name: basic-hw + type: binary + - + name: pkt64 + type: binary + +sub-messages: + - + name: tc-options-msg + formats: + - + value: bfifo + fixed-header: tc-fifo-qopt + - + value: cake + attribute-set: tc-cake-attrs + - + value: cbs + attribute-set: tc-cbs-attrs + - + value: choke + attribute-set: tc-choke-attrs + - + value: clsact # no content + - + value: codel + attribute-set: tc-codel-attrs + - + value: drr + attribute-set: tc-drr-attrs + - + value: etf + attribute-set: tc-etf-attrs + - + value: ets + attribute-set: tc-ets-attrs + - + value: fq + attribute-set: tc-fq-attrs + - + value: fq_codel + attribute-set: tc-fq-codel-attrs + - + value: fq_pie + attribute-set: tc-fq-pie-attrs + - + value: flower + attribute-set: tc-flower-attrs + - + value: gred + attribute-set: tc-gred-attrs + - + value: hfsc + fixed-header: tc-hfsc-qopt + - + value: hhf + attribute-set: tc-hhf-attrs + - + value: htb + attribute-set: tc-htb-attrs + - + value: ingress # no content + - + value: mq # no content + - + value: mqprio + fixed-header: tc-mqprio-qopt + - + value: multiq + fixed-header: tc-multiq-qopt + - + value: netem + fixed-header: tc-netem-qopt + attribute-set: tc-netem-attrs + - + value: pfifo + fixed-header: tc-fifo-qopt + - + value: pfifo_fast + fixed-header: tc-prio-qopt + - + value: pfifo_head_drop + fixed-header: tc-fifo-qopt + - + value: pie + attribute-set: tc-pie-attrs + - + value: plug + fixed-header: tc-plug-qopt + - + value: prio + fixed-header: tc-prio-qopt + - + value: qfq + attribute-set: tc-qfq-attrs + - + value: red + attribute-set: tc-red-attrs + - + value: sfb + fixed-header: tc-sfb-qopt + - + value: sfq + fixed-header: tc-sfq-qopt-v1 + - + value: taprio + attribute-set: tc-taprio-attrs + - + value: tbf + attribute-set: tc-tbf-attrs + - + name: tc-act-options-msg + formats: + - + value: gact + attribute-set: tca-gact-attrs + - + name: tca-stats-app-msg + formats: + - + value: bfifo + - + value: blackhole + - + value: cake + attribute-set: tc-cake-stats-attrs + - + value: cbs + - + value: choke + - + value: clsact + - + value: codel + - + value: drr + - + value: etf + - + value: ets + - + value: fq + - + value: fq_codel + - + value: fq_pie + - + value: flower + - + value: gred + - + value: hfsc + - + value: hhf + - + value: htb + - + value: ingress + - + value: mq + - + value: mqprio + - + value: multiq + - + value: netem + - + value: noqueue + - + value: pfifo + - + value: pfifo_fast + - + value: pfifo_head_drop + - + value: pie + - + value: plug + - + value: prio + - + value: qfq + - + value: red + - + value: sfb + - + value: sfq + - + value: taprio + - + value: tbf + +operations: + enum-model: directional + list: + - + name: newqdisc + doc: Create new tc qdisc. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 36 + attributes: &create-params + - kind + - options + - rate + - chain + - ingress-block + - egress-block + - + name: delqdisc + doc: Delete existing tc qdisc. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 37 + - + name: getqdisc + doc: Get / dump tc qdisc information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 38 + attributes: + - dump-invisible + reply: + value: 36 + attributes: &tc-all + - kind + - options + - stats + - xstats + - rate + - fcnt + - stats2 + - stab + - chain + - ingress-block + - egress-block + - + name: newtclass + doc: Get / dump tc traffic class information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 40 + attributes: *create-params + - + name: deltclass + doc: Get / dump tc traffic class information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 41 + - + name: gettclass + doc: Get / dump tc traffic class information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 42 + reply: + value: 40 + attributes: *tc-all + - + name: newtfilter + doc: Get / dump tc filter information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 44 + attributes: *create-params + - + name: deltfilter + doc: Get / dump tc filter information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 45 + attributes: + - chain + - kind + - + name: gettfilter + doc: Get / dump tc filter information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 46 + attributes: + - chain + - kind + reply: + value: 44 + attributes: *tc-all + dump: + request: + value: 46 + attributes: + - chain + - dump-flags + reply: + value: 44 + attributes: *tc-all + - + name: newchain + doc: Get / dump tc chain information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 100 + attributes: *create-params + - + name: delchain + doc: Get / dump tc chain information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 101 + attributes: + - chain + - + name: getchain + doc: Get / dump tc chain information. + attribute-set: tc-attrs + fixed-header: tcmsg + do: + request: + value: 102 + attributes: + - chain + reply: + value: 100 + attributes: *tc-all + +mcast-groups: + list: + - + name: rtnlgrp-tc + value: 4 -- cgit v1.3.1 From 646158f20cbc4f93f5318f8adee0f3f5b92ff6a8 Mon Sep 17 00:00:00 2001 From: Donald Hunter Date: Fri, 15 Dec 2023 09:37:16 +0000 Subject: doc/netlink: Regenerate netlink .rst files if ynl-gen-rst changes Add ynl-gen-rst.py to the dependencies for the netlink .rst files in the doc Makefile so that the docs get regenerated if the ynl-gen-rst.py script is modified. Use $(Q) to honour V=1 in the rules that run ynl-gen-rst.py Reviewed-by: Jakub Kicinski Reviewed-by: Breno Leitao Signed-off-by: Donald Hunter Link: https://lore.kernel.org/r/20231215093720.18774-10-donald.hunter@gmail.com Signed-off-by: Jakub Kicinski --- Documentation/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/Makefile b/Documentation/Makefile index 5c156fbb6cdf..3885bbe260eb 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -106,10 +106,10 @@ YNL_RST_FILES_TMP := $(patsubst %.yaml,%.rst,$(wildcard $(YNL_YAML_DIR)/*.yaml)) YNL_RST_FILES := $(patsubst $(YNL_YAML_DIR)%,$(YNL_RST_DIR)%, $(YNL_RST_FILES_TMP)) $(YNL_INDEX): $(YNL_RST_FILES) - @$(YNL_TOOL) -o $@ -x + $(Q)$(YNL_TOOL) -o $@ -x -$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml - @$(YNL_TOOL) -i $< -o $@ +$(YNL_RST_DIR)/%.rst: $(YNL_YAML_DIR)/%.yaml $(YNL_TOOL) + $(Q)$(YNL_TOOL) -i $< -o $@ htmldocs: $(YNL_INDEX) @$(srctree)/scripts/sphinx-pre-install --version-check -- cgit v1.3.1 From 13b127d2578432e1e521310b69944c5a1b30679c Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Dec 2023 13:30:00 +0100 Subject: devlink: add a command to set notification filter and use it for multicasts Currently the user listening on a socket for devlink notifications gets always all messages for all existing instances, even if he is interested only in one of those. That may cause unnecessary overhead on setups with thousands of instances present. User is currently able to narrow down the devlink objects replies to dump commands by specifying select attributes. Allow similar approach for notifications. Introduce a new devlink NOTIFY_FILTER_SET which the user passes the select attributes. Store these per-socket and use them for filtering messages during multicast send. Signed-off-by: Jiri Pirko Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/devlink.yaml | 10 +++ include/uapi/linux/devlink.h | 2 + net/devlink/devl_internal.h | 34 +++++++++- net/devlink/netlink.c | 108 +++++++++++++++++++++++++++++++ net/devlink/netlink_gen.c | 15 ++++- net/devlink/netlink_gen.h | 4 +- 6 files changed, 169 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index c3a438197964..88bfcb3c3346 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -2254,3 +2254,13 @@ operations: - bus-name - dev-name - selftests + + - + name: notify-filter-set + doc: Set notification messages socket filter. + attribute-set: devlink + do: + request: + attributes: + - bus-name + - dev-name diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index b3c8383d342d..130cae0d3e20 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -139,6 +139,8 @@ enum devlink_command { DEVLINK_CMD_SELFTESTS_GET, /* can dump */ DEVLINK_CMD_SELFTESTS_RUN, + DEVLINK_CMD_NOTIFY_FILTER_SET, + /* add new commands above here */ __DEVLINK_CMD_MAX, DEVLINK_CMD_MAX = __DEVLINK_CMD_MAX - 1 diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 84dc9628d3f2..82e0fb3bbebf 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -191,11 +191,41 @@ static inline bool devlink_nl_notify_need(struct devlink *devlink) DEVLINK_MCGRP_CONFIG); } +struct devlink_obj_desc { + struct rcu_head rcu; + const char *bus_name; + const char *dev_name; + long data[]; +}; + +static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, + struct devlink *devlink) +{ + memset(desc, 0, sizeof(*desc)); + desc->bus_name = devlink->dev->bus->name; + desc->dev_name = dev_name(devlink->dev); +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data); + +static inline void devlink_nl_notify_send_desc(struct devlink *devlink, + struct sk_buff *msg, + struct devlink_obj_desc *desc) +{ + genlmsg_multicast_netns_filtered(&devlink_nl_family, + devlink_net(devlink), + msg, 0, DEVLINK_MCGRP_CONFIG, + GFP_KERNEL, + devlink_nl_notify_filter, desc); +} + static inline void devlink_nl_notify_send(struct devlink *devlink, struct sk_buff *msg) { - genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), - msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL); + struct devlink_obj_desc desc; + + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_notify_send_desc(devlink, msg, &desc); } /* Notify */ diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index fa9afe3e6d9b..3176be2585cb 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -17,6 +17,111 @@ static const struct genl_multicast_group devlink_nl_mcgrps[] = { [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME }, }; +struct devlink_nl_sock_priv { + struct devlink_obj_desc __rcu *flt; + spinlock_t flt_lock; /* Protects flt. */ +}; + +static void devlink_nl_sock_priv_init(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + + spin_lock_init(&sk_priv->flt_lock); +} + +static void devlink_nl_sock_priv_destroy(void *priv) +{ + struct devlink_nl_sock_priv *sk_priv = priv; + struct devlink_obj_desc *flt; + + flt = rcu_dereference_protected(sk_priv->flt, true); + kfree_rcu(flt, rcu); +} + +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info) +{ + struct devlink_nl_sock_priv *sk_priv; + struct nlattr **attrs = info->attrs; + struct devlink_obj_desc *flt; + size_t data_offset = 0; + size_t data_size = 0; + char *pos; + + if (attrs[DEVLINK_ATTR_BUS_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_BUS_NAME]) + 1); + if (attrs[DEVLINK_ATTR_DEV_NAME]) + data_size = size_add(data_size, + nla_len(attrs[DEVLINK_ATTR_DEV_NAME]) + 1); + + flt = kzalloc(size_add(sizeof(*flt), data_size), GFP_KERNEL); + if (!flt) + return -ENOMEM; + + pos = (char *) flt->data; + if (attrs[DEVLINK_ATTR_BUS_NAME]) { + data_offset += nla_strscpy(pos, + attrs[DEVLINK_ATTR_BUS_NAME], + data_size) + 1; + flt->bus_name = pos; + pos += data_offset; + } + if (attrs[DEVLINK_ATTR_DEV_NAME]) { + nla_strscpy(pos, attrs[DEVLINK_ATTR_DEV_NAME], + data_size - data_offset); + flt->dev_name = pos; + } + + /* Don't attach empty filter. */ + if (!flt->bus_name && !flt->dev_name) { + kfree(flt); + flt = NULL; + } + + sk_priv = genl_sk_priv_get(&devlink_nl_family, NETLINK_CB(skb).sk); + if (IS_ERR(sk_priv)) { + kfree(flt); + return PTR_ERR(sk_priv); + } + spin_lock(&sk_priv->flt_lock); + flt = rcu_replace_pointer(sk_priv->flt, flt, + lockdep_is_held(&sk_priv->flt_lock)); + spin_unlock(&sk_priv->flt_lock); + kfree_rcu(flt, rcu); + return 0; +} + +static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc, + const struct devlink_obj_desc *flt) +{ + if (desc->bus_name && flt->bus_name && + strcmp(desc->bus_name, flt->bus_name)) + return false; + if (desc->dev_name && flt->dev_name && + strcmp(desc->dev_name, flt->dev_name)) + return false; + return true; +} + +int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data) +{ + struct devlink_obj_desc *desc = data; + struct devlink_nl_sock_priv *sk_priv; + struct devlink_obj_desc *flt; + int ret = 0; + + rcu_read_lock(); + sk_priv = __genl_sk_priv_get(&devlink_nl_family, dsk); + if (!IS_ERR_OR_NULL(sk_priv)) { + flt = rcu_dereference(sk_priv->flt); + if (flt) + ret = !devlink_obj_desc_match(desc, flt); + } + rcu_read_unlock(); + return ret; +} + int devlink_nl_put_nested_handle(struct sk_buff *msg, struct net *net, struct devlink *devlink, int attrtype) { @@ -256,4 +361,7 @@ struct genl_family devlink_nl_family __ro_after_init = { .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1, .mcgrps = devlink_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps), + .sock_priv_size = sizeof(struct devlink_nl_sock_priv), + .sock_priv_init = devlink_nl_sock_priv_init, + .sock_priv_destroy = devlink_nl_sock_priv_destroy, }; diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 95f9b4350ab7..1cb0e05305d2 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -560,8 +560,14 @@ static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELF [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), }; +/* DEVLINK_CMD_NOTIFY_FILTER_SET - do */ +static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, +}; + /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[73] = { +const struct genl_split_ops devlink_nl_ops[74] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -1233,4 +1239,11 @@ const struct genl_split_ops devlink_nl_ops[73] = { .maxattr = DEVLINK_ATTR_SELFTESTS, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, + { + .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET, + .doit = devlink_nl_notify_filter_set_doit, + .policy = devlink_notify_filter_set_nl_policy, + .maxattr = DEVLINK_ATTR_DEV_NAME, + .flags = GENL_CMD_CAP_DO, + }, }; diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 02f3c0bfae0e..8f2bd50ddf5e 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -16,7 +16,7 @@ extern const struct nla_policy devlink_dl_port_function_nl_policy[DEVLINK_PORT_F extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[73]; +extern const struct genl_split_ops devlink_nl_ops[74]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -142,5 +142,7 @@ int devlink_nl_selftests_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_selftests_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb); int devlink_nl_selftests_run_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, + struct genl_info *info); #endif /* _LINUX_DEVLINK_GEN_H */ -- cgit v1.3.1 From ded6f77c05b113001d449cf2cc810e090f20ec4a Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 16 Dec 2023 13:30:01 +0100 Subject: devlink: extend multicast filtering by port index Expose the previously introduced notification multicast messages filtering infrastructure and allow the user to select messages using port index. Signed-off-by: Jiri Pirko Signed-off-by: Paolo Abeni --- Documentation/netlink/specs/devlink.yaml | 1 + net/devlink/devl_internal.h | 9 +++++++++ net/devlink/health.c | 6 +++++- net/devlink/netlink.c | 10 +++++++++- net/devlink/netlink_gen.c | 5 +++-- net/devlink/port.c | 5 ++++- 6 files changed, 31 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/devlink.yaml b/Documentation/netlink/specs/devlink.yaml index 88bfcb3c3346..cf6eaa0da821 100644 --- a/Documentation/netlink/specs/devlink.yaml +++ b/Documentation/netlink/specs/devlink.yaml @@ -2264,3 +2264,4 @@ operations: attributes: - bus-name - dev-name + - port-index diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 82e0fb3bbebf..c7a8e13f917c 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -195,6 +195,8 @@ struct devlink_obj_desc { struct rcu_head rcu; const char *bus_name; const char *dev_name; + unsigned int port_index; + bool port_index_valid; long data[]; }; @@ -206,6 +208,13 @@ static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, desc->dev_name = dev_name(devlink->dev); } +static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, + struct devlink_port *devlink_port) +{ + desc->port_index = devlink_port->index; + desc->port_index_valid = true; +} + int devlink_nl_notify_filter(struct sock *dsk, struct sk_buff *skb, void *data); static inline void devlink_nl_notify_send_desc(struct devlink *devlink, diff --git a/net/devlink/health.c b/net/devlink/health.c index 1d59ec0202f6..acb8c0e174bb 100644 --- a/net/devlink/health.c +++ b/net/devlink/health.c @@ -490,6 +490,7 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, enum devlink_command cmd) { struct devlink *devlink = reporter->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; @@ -509,7 +510,10 @@ static void devlink_recover_notify(struct devlink_health_reporter *reporter, return; } - devlink_nl_notify_send(devlink, msg); + devlink_nl_obj_desc_init(&desc, devlink); + if (reporter->devlink_port) + devlink_nl_obj_desc_port_set(&desc, reporter->devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } void diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 3176be2585cb..499885c8b9ca 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -73,8 +73,13 @@ int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, flt->dev_name = pos; } + if (attrs[DEVLINK_ATTR_PORT_INDEX]) { + flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); + flt->port_index_valid = true; + } + /* Don't attach empty filter. */ - if (!flt->bus_name && !flt->dev_name) { + if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) { kfree(flt); flt = NULL; } @@ -101,6 +106,9 @@ static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc, if (desc->dev_name && flt->dev_name && strcmp(desc->dev_name, flt->dev_name)) return false; + if (desc->port_index_valid && flt->port_index_valid && + desc->port_index != flt->port_index) + return false; return true; } diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index 1cb0e05305d2..c81cf2dd154f 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -561,9 +561,10 @@ static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELF }; /* DEVLINK_CMD_NOTIFY_FILTER_SET - do */ -static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* Ops table for devlink */ @@ -1243,7 +1244,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET, .doit = devlink_nl_notify_filter_set_doit, .policy = devlink_notify_filter_set_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_PORT_INDEX, .flags = GENL_CMD_CAP_DO, }, }; diff --git a/net/devlink/port.c b/net/devlink/port.c index 758df3000a1b..62e54e152ecf 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -507,6 +507,7 @@ static void devlink_port_notify(struct devlink_port *devlink_port, enum devlink_command cmd) { struct devlink *devlink = devlink_port->devlink; + struct devlink_obj_desc desc; struct sk_buff *msg; int err; @@ -525,7 +526,9 @@ static void devlink_port_notify(struct devlink_port *devlink_port, return; } - devlink_nl_notify_send(devlink, msg); + devlink_nl_obj_desc_init(&desc, devlink); + devlink_nl_obj_desc_port_set(&desc, devlink_port); + devlink_nl_notify_send_desc(devlink, msg, &desc); } static void devlink_ports_notify(struct devlink *devlink, -- cgit v1.3.1 From 2ab0edb505faa9ac90dee1732571390f074e8113 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:38 +0100 Subject: net: ethtool: Allow passing a phy index for some commands Some netlink commands are target towards ethernet PHYs, to control some of their features. As there's several such commands, add the ability to pass a PHY index in the ethnl request, which will populate the generic ethnl_req_info with the relevant phydev when the command targets a PHY. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 7 +++++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.c | 24 ++++++++++++++++++++++++ net/ethtool/netlink.h | 7 +++++-- 4 files changed, 37 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d583d9abf2f8..3ca6c21e74af 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -57,6 +57,7 @@ Structure of this header is ``ETHTOOL_A_HEADER_DEV_INDEX`` u32 device ifindex ``ETHTOOL_A_HEADER_DEV_NAME`` string device name ``ETHTOOL_A_HEADER_FLAGS`` u32 flags common for all requests + ``ETHTOOL_A_HEADER_PHY_INDEX`` u32 phy device index ============================== ====== ============================= ``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the @@ -81,6 +82,12 @@ the behaviour is backward compatible, i.e. requests from old clients not aware of the flag should be interpreted the way the client expects. A client must not set flags it does not understand. +``ETHTOOL_A_HEADER_PHY_INDEX`` identify the ethernet PHY the message relates to. +As there are numerous commands that are related to PHY configuration, and because +we can have more than one PHY on the link, the PHY index can be passed in the +request for the commands that needs it. It is however not mandatory, and if it +is not passed for commands that target a PHY, the net_device.phydev pointer +is used, as a fallback that keeps the legacy behaviour. Bit sets ======== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 3f89074aa06c..422e8cfdd98c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -133,6 +133,7 @@ enum { ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ ETHTOOL_A_HEADER_DEV_NAME, /* string */ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ + ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index fe3553f60bf3..1c26766ce996 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -4,6 +4,7 @@ #include #include #include "netlink.h" +#include static struct genl_family ethtool_genl_family; @@ -20,6 +21,7 @@ const struct nla_policy ethnl_header_policy[] = { .len = ALTIFNAMSIZ - 1 }, [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, ETHTOOL_FLAGS_BASIC), + [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; const struct nla_policy ethnl_header_policy_stats[] = { @@ -28,6 +30,7 @@ const struct nla_policy ethnl_header_policy_stats[] = { .len = ALTIFNAMSIZ - 1 }, [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, ETHTOOL_FLAGS_STATS), + [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; int ethnl_ops_begin(struct net_device *dev) @@ -91,6 +94,7 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, { struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)]; const struct nlattr *devname_attr; + struct phy_device *phydev = NULL; struct net_device *dev = NULL; u32 flags = 0; int ret; @@ -145,6 +149,26 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } + if (dev) { + if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { + u32 phy_index = nla_get_u32(tb[ETHTOOL_A_HEADER_PHY_INDEX]); + + phydev = phy_link_topo_get_phy(&dev->link_topo, + phy_index); + if (!phydev) { + NL_SET_ERR_MSG_ATTR(extack, header, + "no phy matches phy index"); + return -EINVAL; + } + } else { + /* If we need a PHY but no phy index is specified, fallback + * to dev->phydev + */ + phydev = dev->phydev; + } + } + + req_info->phydev = phydev; req_info->dev = dev; req_info->flags = flags; return 0; diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a333a8d04c1..def84e2def9e 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -250,6 +250,7 @@ static inline unsigned int ethnl_reply_header_size(void) * @dev: network device the request is for (may be null) * @dev_tracker: refcount tracker for @dev reference * @flags: request flags common for all request types + * @phydev: phy_device connected to @dev this request is for (may be null) * * This is a common base for request specific structures holding data from * parsed userspace request. These always embed struct ethnl_req_info at @@ -259,6 +260,7 @@ struct ethnl_req_info { struct net_device *dev; netdevice_tracker dev_tracker; u32 flags; + struct phy_device *phydev; }; static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) @@ -395,9 +397,10 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; +extern const struct ethnl_request_ops ethnl_phy_request_ops; -extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; -extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; +extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_PHY_INDEX + 1]; +extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1]; extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1]; extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1]; extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1]; -- cgit v1.3.1 From c29451aefcb42359905d18678de38e52eccb3bb5 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:39 +0100 Subject: netlink: specs: add phy-index as a header parameter Update the spec to take the newly introduced phy-index as a generic request parameter. Signed-off-by: Maxime Chevallier Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 197208f419dc..bb6e1dc6d1c5 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -30,6 +30,9 @@ attribute-sets: - name: flags type: u32 + - + name: phy-index + type: u32 - name: bitset-bit -- cgit v1.3.1 From 63d5eaf35ac36cad00cfb3809d794ef0078c822b Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:40 +0100 Subject: net: ethtool: Introduce a command to list PHYs on an interface As we have the ability to track the PHYs connected to a net_device through the link_topology, we can expose this list to userspace. This allows userspace to use these identifiers for phy-specific commands and take the decision of which PHY to target by knowing the link topology. Add PHY_GET and PHY_DUMP, which can be a filtered DUMP operation to list devices on only one interface. Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 44 ++++ include/uapi/linux/ethtool_netlink.h | 29 +++ net/ethtool/Makefile | 2 +- net/ethtool/netlink.c | 9 + net/ethtool/netlink.h | 5 + net/ethtool/phy.c | 306 +++++++++++++++++++++++++++ 6 files changed, 394 insertions(+), 1 deletion(-) create mode 100644 net/ethtool/phy.c (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 3ca6c21e74af..97ff787a7dd8 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -2011,6 +2011,49 @@ The attributes are propagated to the driver through the following structure: .. kernel-doc:: include/linux/ethtool.h :identifiers: ethtool_mm_cfg +PHY_GET +======= + +Retrieve information about a given Ethernet PHY sitting on the link. As there +can be more than one PHY, the DUMP operation can be used to list the PHYs +present on a given interface, by passing an interface index or name in +the dump request + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PHY_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_PHY_HEADER`` nested request header + ``ETHTOOL_A_PHY_INDEX`` u32 the phy's unique index, that can + be used for phy-specific requests + ``ETHTOOL_A_PHY_DRVNAME`` string the phy driver name + ``ETHTOOL_A_PHY_NAME`` string the phy device name + ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` u32 the type of device this phy is + connected to + ``ETHTOOL_A_PHY_UPSTREAM_PHY`` nested if the phy is connected to another + phy, this nest contains info on + that connection + ``ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME`` string if the phy controls an sfp bus, + the name of the sfp bus + ``ETHTOOL_A_PHY_ID`` u32 the phy id if the phy is C22 + ===================================== ====== ========================== + +When ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` is PHY_UPSTREAM_PHY, the PHY's parent is +another PHY. Information on the parent PHY will be set in the +``ETHTOOL_A_PHY_UPSTREAM_PHY`` nest, which has the following structure : + + =================================== ====== ========================== + ``ETHTOOL_A_PHY_UPSTREAM_INDEX`` u32 the PHY index of the upstream PHY + ``ETHTOOL_A_PHY_UPSTREAM_SFP_NAME`` string if this PHY is connected to it's + parent PHY through an SFP bus, the + name of this sfp bus + =================================== ====== ========================== + Request translation =================== @@ -2117,4 +2160,5 @@ are netlink only. n/a ``ETHTOOL_MSG_PLCA_GET_STATUS`` n/a ``ETHTOOL_MSG_MM_GET`` n/a ``ETHTOOL_MSG_MM_SET`` + n/a ``ETHTOOL_MSG_PHY_GET`` =================================== ===================================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 422e8cfdd98c..00cd7ad16709 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_PHY_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,8 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_PHY_GET_REPLY, + ETHTOOL_MSG_PHY_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -977,6 +980,32 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +enum { + ETHTOOL_A_PHY_UPSTREAM_UNSPEC, + ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_UPSTREAM_CNT, + ETHTOOL_A_PHY_UPSTREAM_MAX = (__ETHTOOL_A_PHY_UPSTREAM_CNT - 1) +}; + +enum { + ETHTOOL_A_PHY_UNSPEC, + ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PHY_INDEX, /* u32 */ + ETHTOOL_A_PHY_DRVNAME, /* string */ + ETHTOOL_A_PHY_NAME, /* string */ + ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u8 */ + ETHTOOL_A_PHY_UPSTREAM, /* nest - _A_PHY_UPSTREAM_* */ + ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ + ETHTOOL_A_PHY_ID, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_PHY_CNT, + ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 504f954a1b28..0ccd0e9afd3f 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ - module.o pse-pd.o plca.o mm.o + module.o pse-pd.o plca.o mm.o phy.o diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 1c26766ce996..92b0dd8ca046 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -1153,6 +1153,15 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_mm_set_policy, .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_PHY_GET, + .doit = ethnl_phy_doit, + .start = ethnl_phy_start, + .dumpit = ethnl_phy_dumpit, + .done = ethnl_phy_done, + .policy = ethnl_phy_get_policy, + .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index def84e2def9e..5e6a43e35a09 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -444,6 +444,7 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1] extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1]; extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; +extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); @@ -451,6 +452,10 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_phy_start(struct netlink_callback *cb); +int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info); +int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_phy_done(struct netlink_callback *cb); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c new file mode 100644 index 000000000000..5add2840aaeb --- /dev/null +++ b/net/ethtool/phy.c @@ -0,0 +1,306 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2023 Bootlin + * + */ +#include "common.h" +#include "netlink.h" + +#include +#include +#include + +struct phy_req_info { + struct ethnl_req_info base; + struct phy_device_node pdn; +}; + +#define PHY_REQINFO(__req_base) \ + container_of(__req_base, struct phy_req_info, base) + +const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1] = { + [ETHTOOL_A_PHY_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), +}; + +/* Caller holds rtnl */ +static ssize_t +ethnl_phy_reply_size(const struct ethnl_req_info *req_base, + struct netlink_ext_ack *extack) +{ + struct phy_link_topology *topo; + struct phy_device_node *pdn; + struct phy_device *phydev; + unsigned long index; + size_t size; + + ASSERT_RTNL(); + + topo = &req_base->dev->link_topo; + + size = nla_total_size(0); + + xa_for_each(&topo->phys, index, pdn) { + phydev = pdn->phy; + + /* ETHTOOL_A_PHY_INDEX */ + size += nla_total_size(sizeof(u32)); + + /* ETHTOOL_A_DRVNAME */ + size += nla_total_size(strlen(phydev->drv->name) + 1); + + /* ETHTOOL_A_NAME */ + size += nla_total_size(strlen(dev_name(&phydev->mdio.dev)) + 1); + + /* ETHTOOL_A_PHY_UPSTREAM_TYPE */ + size += nla_total_size(sizeof(u8)); + + /* ETHTOOL_A_PHY_ID */ + size += nla_total_size(sizeof(u32)); + + if (phy_on_sfp(phydev)) { + const char *upstream_sfp_name = sfp_get_name(pdn->parent_sfp_bus); + + /* ETHTOOL_A_PHY_UPSTREAM_SFP_NAME */ + if (upstream_sfp_name) + size += nla_total_size(strlen(upstream_sfp_name) + 1); + + /* ETHTOOL_A_PHY_UPSTREAM_INDEX */ + size += nla_total_size(sizeof(u32)); + } + + /* ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME */ + if (phydev->sfp_bus) { + const char *sfp_name = sfp_get_name(phydev->sfp_bus); + + if (sfp_name) + size += nla_total_size(strlen(sfp_name) + 1); + } + } + + return size; +} + +static int +ethnl_phy_fill_reply(const struct ethnl_req_info *req_base, struct sk_buff *skb) +{ + struct phy_req_info *req_info = PHY_REQINFO(req_base); + struct phy_device_node *pdn = &req_info->pdn; + struct phy_device *phydev = pdn->phy; + enum phy_upstream ptype; + struct nlattr *nest; + + ptype = pdn->upstream_type; + + if (nla_put_u32(skb, ETHTOOL_A_PHY_INDEX, phydev->phyindex) || + nla_put_string(skb, ETHTOOL_A_PHY_DRVNAME, phydev->drv->name) || + nla_put_string(skb, ETHTOOL_A_PHY_NAME, dev_name(&phydev->mdio.dev)) || + nla_put_u8(skb, ETHTOOL_A_PHY_UPSTREAM_TYPE, ptype) || + nla_put_u32(skb, ETHTOOL_A_PHY_ID, phydev->phy_id)) + return -EMSGSIZE; + + if (ptype == PHY_UPSTREAM_PHY) { + struct phy_device *upstream = pdn->upstream.phydev; + const char *sfp_upstream_name; + + nest = nla_nest_start(skb, ETHTOOL_A_PHY_UPSTREAM); + if (!nest) + return -EMSGSIZE; + + /* Parent index */ + if (nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_INDEX, upstream->phyindex)) + return -EMSGSIZE; + + if (pdn->parent_sfp_bus) { + sfp_upstream_name = sfp_get_name(pdn->parent_sfp_bus); + if (sfp_upstream_name && nla_put_string(skb, + ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, + sfp_upstream_name)) + return -EMSGSIZE; + } + + nla_nest_end(skb, nest); + } + + if (phydev->sfp_bus) { + const char *sfp_name = sfp_get_name(phydev->sfp_bus); + + if (sfp_name && + nla_put_string(skb, ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, + sfp_name)) + return -EMSGSIZE; + } + + return 0; +} + +static int ethnl_phy_parse_request(struct ethnl_req_info *req_base, + struct nlattr **tb) +{ + struct phy_link_topology *topo = &req_base->dev->link_topo; + struct phy_req_info *req_info = PHY_REQINFO(req_base); + struct phy_device_node *pdn; + + if (!req_base->phydev) + return 0; + + pdn = xa_load(&topo->phys, req_base->phydev->phyindex); + memcpy(&req_info->pdn, pdn, sizeof(*pdn)); + + return 0; +} + +int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) +{ + struct phy_req_info req_info = {}; + struct nlattr **tb = info->attrs; + struct sk_buff *rskb; + void *reply_payload; + int reply_len; + int ret; + + ret = ethnl_parse_header_dev_get(&req_info.base, + tb[ETHTOOL_A_PHY_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + + rtnl_lock(); + + ret = ethnl_phy_parse_request(&req_info.base, tb); + if (ret < 0) + goto err_unlock_rtnl; + + /* No PHY, return early */ + if (!req_info.pdn.phy) + goto err_unlock_rtnl; + + ret = ethnl_phy_reply_size(&req_info.base, info->extack); + if (ret < 0) + goto err_unlock_rtnl; + reply_len = ret + ethnl_reply_header_size(); + + rskb = ethnl_reply_init(reply_len, req_info.base.dev, + ETHTOOL_MSG_PHY_GET_REPLY, + ETHTOOL_A_PHY_HEADER, + info, &reply_payload); + if (!rskb) { + ret = -ENOMEM; + goto err_unlock_rtnl; + } + + ret = ethnl_phy_fill_reply(&req_info.base, rskb); + if (ret) + goto err_free_msg; + + rtnl_unlock(); + ethnl_parse_header_dev_put(&req_info.base); + genlmsg_end(rskb, reply_payload); + + return genlmsg_reply(rskb, info); + +err_free_msg: + nlmsg_free(rskb); +err_unlock_rtnl: + rtnl_unlock(); + ethnl_parse_header_dev_put(&req_info.base); + return ret; +} + +struct ethnl_phy_dump_ctx { + struct phy_req_info *phy_req_info; +}; + +int ethnl_phy_start(struct netlink_callback *cb) +{ + const struct genl_dumpit_info *info = genl_dumpit_info(cb); + struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; + struct nlattr **tb = info->info.attrs; + int ret; + + BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); + + ctx->phy_req_info = kzalloc(sizeof(*ctx->phy_req_info), GFP_KERNEL); + if (!ctx->phy_req_info) + return -ENOMEM; + + ret = ethnl_parse_header_dev_get(&ctx->phy_req_info->base, + tb[ETHTOOL_A_PHY_HEADER], + sock_net(cb->skb->sk), cb->extack, + false); + return ret; +} + +int ethnl_phy_done(struct netlink_callback *cb) +{ + struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; + + kfree(ctx->phy_req_info); + + return 0; +} + +static int ethnl_phy_dump_one_dev(struct sk_buff *skb, struct net_device *dev, + struct netlink_callback *cb) +{ + struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; + struct phy_req_info *pri = ctx->phy_req_info; + struct phy_device_node *pdn; + unsigned long index = 1; + int ret = 0; + void *ehdr; + + pri->base.dev = dev; + + xa_for_each(&dev->link_topo.phys, index, pdn) { + ehdr = ethnl_dump_put(skb, cb, + ETHTOOL_MSG_PHY_GET_REPLY); + if (!ehdr) { + ret = -EMSGSIZE; + break; + } + + ret = ethnl_fill_reply_header(skb, dev, + ETHTOOL_A_PHY_HEADER); + if (ret < 0) { + genlmsg_cancel(skb, ehdr); + break; + } + + memcpy(&pri->pdn, pdn, sizeof(*pdn)); + ret = ethnl_phy_fill_reply(&pri->base, skb); + + genlmsg_end(skb, ehdr); + } + + return ret; +} + +int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; + struct net *net = sock_net(skb->sk); + unsigned long ifindex = 1; + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + + if (ctx->phy_req_info->base.dev) { + ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb); + ethnl_parse_header_dev_put(&ctx->phy_req_info->base); + ctx->phy_req_info->base.dev = NULL; + } else { + for_each_netdev_dump(net, dev, ifindex) { + ret = ethnl_phy_dump_one_dev(skb, dev, cb); + if (ret) + break; + } + } + rtnl_unlock(); + + if (ret == -EMSGSIZE && skb->len) + return skb->len; + return ret; +} + -- cgit v1.3.1 From 95132a018f00f5dad38bdcfd4180d1af955d46f6 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:41 +0100 Subject: netlink: specs: add ethnl PHY_GET command set The PHY_GET command, supporting both DUMP and GET operations, is used to retrieve the list of PHYs connected to a netdevice, and get topology information to know where exactly it sits on the physical link. Add the netlink specs corresponding to that command. Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- Documentation/netlink/specs/ethtool.yaml | 65 ++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index bb6e1dc6d1c5..7f6fb1f61dd4 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -16,6 +16,11 @@ definitions: name: stringset type: enum entries: [] + - + name: phy-upstream-type + enum-name: + type: enum + entries: [ mac, phy ] attribute-sets: - @@ -945,6 +950,45 @@ attribute-sets: - name: burst-tmr type: u32 + - + name: phy-upstream + attributes: + - + name: index + type: u32 + - + name: sfp-name + type: string + - + name: phy + attributes: + - + name: header + type: nest + nested-attributes: header + - + name: index + type: u32 + - + name: drvname + type: string + - + name: name + type: string + - + name: upstream-type + type: u8 + enum: phy-upstream-type + - + name: upstream + type: nest + nested-attributes: phy-upstream + - + name: downstream-sfp-name + type: string + - + name: id + type: u32 operations: enum-model: directional @@ -1696,3 +1740,24 @@ operations: name: mm-ntf doc: Notification for change in MAC Merge configuration. notify: mm-get + - + name: phy-get + doc: Get PHY devices attached to an interface + + attribute-set: phy + + do: &phy-get-op + request: + attributes: + - header + reply: + attributes: + - header + - index + - drvname + - name + - upstream-type + - upstream + - downstream-sfp-name + - id + dump: *phy-get-op -- cgit v1.3.1 From 32bb4515e34469975abc936deb0a116c4a445817 Mon Sep 17 00:00:00 2001 From: Maxime Chevallier Date: Thu, 21 Dec 2023 19:00:46 +0100 Subject: Documentation: networking: document phy_link_topology The newly introduced phy_link_topology tracks all ethernet PHYs that are attached to a netdevice. Document the base principle, internal and external APIs. As the phy_link_topology is expected to be extended, this documentation will hold any further improvements and additions made relative to topology handling. Signed-off-by: Maxime Chevallier Signed-off-by: David S. Miller --- Documentation/networking/index.rst | 1 + Documentation/networking/phy-link-topology.rst | 121 +++++++++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 Documentation/networking/phy-link-topology.rst (limited to 'Documentation') diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index 69f3d6dcd9fd..a2c45a75a4a6 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -88,6 +88,7 @@ Contents: operstates packet_mmap phonet + phy-link-topology pktgen plip ppp_generic diff --git a/Documentation/networking/phy-link-topology.rst b/Documentation/networking/phy-link-topology.rst new file mode 100644 index 000000000000..1fd8e904ef4b --- /dev/null +++ b/Documentation/networking/phy-link-topology.rst @@ -0,0 +1,121 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================= +PHY link topology +================= + +Overview +======== + +The PHY link topology representation in the networking stack aims at representing +the hardware layout for any given Ethernet link. + +An Ethernet Interface from userspace's point of view is nothing but a +:c:type:`struct net_device `, which exposes configuration options +through the legacy ioctls and the ethool netlink commands. The base assumption +when designing these configuration channels were that the link looked +something like this :: + + +-----------------------+ +----------+ +--------------+ + | Ethernet Controller / | | Ethernet | | Connector / | + | MAC | ------ | PHY | ---- | Port | ---... to LP + +-----------------------+ +----------+ +--------------+ + struct net_device struct phy_device + +Commands that needs to configure the PHY will go through the net_device.phydev +field to reach the PHY and perform the relevant configuration. + +This assumption falls apart in more complex topologies that can arise when, +for example, using SFP transceivers (although that's not the only specific case). + +Here, we have 2 basic scenarios. Either the MAC is able to output a serialized +interface, that can directly be fed to an SFP cage, such as SGMII, 1000BaseX, +10GBaseR, etc. + +The link topology then looks like this (when an SFP module is inserted) :: + + +-----+ SGMII +------------+ + | MAC | ------- | SFP Module | + +-----+ +------------+ + +Knowing that some modules embed a PHY, the actual link is more like :: + + +-----+ SGMII +--------------+ + | MAC | -------- | PHY (on SFP) | + +-----+ +--------------+ + +In this case, the SFP PHY is handled by phylib, and registered by phylink through +its SFP upstream ops. + +Now some Ethernet controllers aren't able to output a serialized interface, so +we can't directly connect them to an SFP cage. However, some PHYs can be used +as media-converters, to translate the non-serialized MAC MII interface to a +serialized MII interface fed to the SFP :: + + +-----+ RGMII +-----------------------+ SGMII +--------------+ + | MAC | ------- | PHY (media converter) | ------- | PHY (on SFP) | + +-----+ +-----------------------+ +--------------+ + +This is where the model of having a single net_device.phydev pointer shows its +limitations, as we now have 2 PHYs on the link. + +The phy_link topology framework aims at providing a way to keep track of every +PHY on the link, for use by both kernel drivers and subsystems, but also to +report the topology to userspace, allowing to target individual PHYs in configuration +commands. + +API +=== + +The :c:type:`struct phy_link_topology ` is a per-netdevice +resource, that gets initialized at netdevice creation. Once it's initialized, +it is then possible to register PHYs to the topology through : + +:c:func:`phy_link_topo_add_phy` + +Besides registering the PHY to the topology, this call will also assign a unique +index to the PHY, which can then be reported to userspace to refer to this PHY +(akin to the ifindex). This index is a u32, ranging from 1 to U32_MAX. The value +0 is reserved to indicate the PHY doesn't belong to any topology yet. + +The PHY can then be removed from the topology through + +:c:func:`phy_link_topo_del_phy` + +These function are already hooked into the phylib subsystem, so all PHYs that +are linked to a net_device through :c:func:`phy_attach_direct` will automatically +join the netdev's topology. + +PHYs that are on a SFP module will also be automatically registered IF the SFP +upstream is phylink (so, no media-converter). + +PHY drivers that can be used as SFP upstream need to call :c:func:`phy_sfp_attach_phy` +and :c:func:`phy_sfp_detach_phy`, which can be used as a +.attach_phy / .detach_phy implementation for the +:c:type:`struct sfp_upstream_ops `. + +UAPI +==== + +There exist a set of netlink commands to query the link topology from userspace, +see ``Documentation/networking/ethtool-netlink.rst``. + +The whole point of having a topology representation is to assign the phyindex +field in :c:type:`struct phy_device `. This index is reported to +userspace using the ``ETHTOOL_MSG_PHY_GET`` ethtnl command. Performing a DUMP operation +will result in all PHYs from all net_device being listed. The DUMP command +accepts either a ``ETHTOOL_A_HEADER_DEV_INDEX`` or ``ETHTOOL_A_HEADER_DEV_NAME`` +to be passed in the request to filter the DUMP to a single net_device. + +The retrieved index can then be passed as a request parameter using the +``ETHTOOL_A_HEADER_PHY_INDEX`` field in the following ethnl commands : + +* ``ETHTOOL_MSG_STRSET_GET`` to get the stats string set from a given PHY +* ``ETHTOOL_MSG_CABLE_TEST_ACT`` and ``ETHTOOL_MSG_CABLE_TEST_ACT``, to perform + cable testing on a given PHY on the link (most likely the outermost PHY) +* ``ETHTOOL_MSG_PSE_SET`` and ``ETHTOOL_MSG_PSE_GET`` for PHY-controlled PoE and PSE settings +* ``ETHTOOL_MSG_PLCA_GET_CFG``, ``ETHTOOL_MSG_PLCA_SET_CFG`` and ``ETHTOOL_MSG_PLCA_GET_STATUS`` + to set the PLCA (Physical Layer Collision Avoidance) parameters + +Note that the PHY index can be passed to other requests, which will silently +ignore it if present and irrelevant. -- cgit v1.3.1 From 993498e537af9260e697219ce41b41b22b6199cc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 21 Dec 2023 14:07:47 +0000 Subject: net-device: move gso_partial_features to net_device_read_tx dev->gso_partial_features is read from tx fast path for GSO packets. Move it to appropriate section to avoid a cache line miss. Fixes: 43a71cd66b9c ("net-device: reorganize net_device fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: David Ahern Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/networking/net_cachelines/net_device.rst | 2 +- include/linux/netdevice.h | 2 +- net/core/dev.c | 3 ++- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 6cab1b797739..2dd8d8f20da2 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -38,7 +38,7 @@ netdev_features_t wanted_features netdev_features_t vlan_features netdev_features_t hw_enc_features - - netif_skb_features netdev_features_t mpls_features -netdev_features_t gso_partial_features +netdev_features_t gso_partial_features read_mostly gso_features_check unsigned_int min_mtu unsigned_int max_mtu unsigned_short type diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5baa5517f533..d59db9adcc96 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2115,6 +2115,7 @@ struct net_device { const struct net_device_ops *netdev_ops; const struct header_ops *header_ops; struct netdev_queue *_tx; + netdev_features_t gso_partial_features; unsigned int real_num_tx_queues; unsigned int gso_max_size; unsigned int gso_ipv4_max_size; @@ -2211,7 +2212,6 @@ struct net_device { netdev_features_t vlan_features; netdev_features_t hw_enc_features; netdev_features_t mpls_features; - netdev_features_t gso_partial_features; unsigned int min_mtu; unsigned int max_mtu; diff --git a/net/core/dev.c b/net/core/dev.c index df04cbf77551..31588a50b757 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11629,6 +11629,7 @@ static void __init net_dev_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_ipv4_max_size); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_max_segs); + CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, gso_partial_features); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, num_tc); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, mtu); CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, needed_headroom); @@ -11642,7 +11643,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_tx, tcx_egress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 152); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_tx, 160); /* TXRX read-mostly hotpath */ CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_txrx, flags); -- cgit v1.3.1 From 7df54188a897ff656e237239f2b02a8f70183333 Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Fri, 22 Dec 2023 14:36:28 +0100 Subject: Documentation: add pyyaml to requirements.txt Commit f061c9f7d058 ("Documentation: Document each netlink family") added a new Python script that is invoked during 'make htmldocs' and which reads the netlink YAML spec files. Using the virtualenv from scripts/sphinx-pre-install, we get this new error wen running 'make htmldocs': Traceback (most recent call last): File "./tools/net/ynl/ynl-gen-rst.py", line 26, in import yaml ModuleNotFoundError: No module named 'yaml' make[2]: *** [Documentation/Makefile:112: Documentation/networking/netlink_spec/rt_link.rst] Error 1 make[1]: *** [Makefile:1708: htmldocs] Error 2 Fix this by adding 'pyyaml' to requirements.txt. Note: This was somehow present in the original patch submission: I'm not sure why the pyyaml requirement disappeared in the meantime. Fixes: f061c9f7d058 ("Documentation: Document each netlink family") Cc: Breno Leitao Cc: Jakub Kicinski Cc: David S. Miller Cc: Jonathan Corbet Signed-off-by: Vegard Nossum Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- Documentation/sphinx/requirements.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/sphinx/requirements.txt b/Documentation/sphinx/requirements.txt index 335b53df35e2..a8a1aff6445e 100644 --- a/Documentation/sphinx/requirements.txt +++ b/Documentation/sphinx/requirements.txt @@ -1,3 +1,4 @@ # jinja2>=3.1 is not compatible with Sphinx<4.0 jinja2<3.1 Sphinx==2.4.4 +pyyaml -- cgit v1.3.1 From ebdc193b2ce209bfc1ebec2f777cd7bac00b547c Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Sat, 23 Dec 2023 05:39:53 -0800 Subject: octeon_ep_vf: Add driver framework and device initialization Add driver framework and device setup and initialization for Octeon PCI Endpoint NIC VF. Add implementation to load module, initialize, register network device, cleanup and unload module. Signed-off-by: Shinas Rasheed Signed-off-by: David S. Miller --- .../networking/device_drivers/ethernet/index.rst | 1 + .../ethernet/marvell/octeon_ep_vf.rst | 24 + drivers/net/ethernet/marvell/Kconfig | 1 + drivers/net/ethernet/marvell/Makefile | 1 + drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig | 19 + drivers/net/ethernet/marvell/octeon_ep_vf/Makefile | 9 + .../ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c | 157 ++++++ .../ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c | 158 ++++++ .../marvell/octeon_ep_vf/octep_vf_config.h | 160 +++++++ .../ethernet/marvell/octeon_ep_vf/octep_vf_main.c | 528 +++++++++++++++++++++ .../ethernet/marvell/octeon_ep_vf/octep_vf_main.h | 336 +++++++++++++ .../ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c | 96 ++++ .../ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h | 25 + .../marvell/octeon_ep_vf/octep_vf_regs_cn9k.h | 154 ++++++ .../marvell/octeon_ep_vf/octep_vf_regs_cnxk.h | 162 +++++++ .../ethernet/marvell/octeon_ep_vf/octep_vf_rx.c | 42 ++ .../ethernet/marvell/octeon_ep_vf/octep_vf_rx.h | 224 +++++++++ .../ethernet/marvell/octeon_ep_vf/octep_vf_tx.c | 43 ++ .../ethernet/marvell/octeon_ep_vf/octep_vf_tx.h | 276 +++++++++++ 19 files changed, 2416 insertions(+) create mode 100644 Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/Makefile create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c create mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 43de285b8a92..6932d8c043c2 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -42,6 +42,7 @@ Contents: intel/ice marvell/octeontx2 marvell/octeon_ep + marvell/octeon_ep_vf mellanox/mlx5/index microsoft/netvsc neterion/s2io diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst new file mode 100644 index 000000000000..603133d0b92f --- /dev/null +++ b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst @@ -0,0 +1,24 @@ +.. SPDX-License-Identifier: GPL-2.0+ + +======================================================================= +Linux kernel networking driver for Marvell's Octeon PCI Endpoint NIC VF +======================================================================= + +Network driver for Marvell's Octeon PCI EndPoint NIC VF. +Copyright (c) 2020 Marvell International Ltd. + +Overview +======== +This driver implements networking functionality of Marvell's Octeon PCI +EndPoint NIC VF. + +Supported Devices +================= +Currently, this driver support following devices: + * Network controller: Cavium, Inc. Device b203 + * Network controller: Cavium, Inc. Device b403 + * Network controller: Cavium, Inc. Device b103 + * Network controller: Cavium, Inc. Device b903 + * Network controller: Cavium, Inc. Device ba03 + * Network controller: Cavium, Inc. Device bc03 + * Network controller: Cavium, Inc. Device bd03 diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index 884d64114bff..837295fecd17 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -180,6 +180,7 @@ config SKY2_DEBUG source "drivers/net/ethernet/marvell/octeontx2/Kconfig" source "drivers/net/ethernet/marvell/octeon_ep/Kconfig" +source "drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig" source "drivers/net/ethernet/marvell/prestera/Kconfig" endif # NET_VENDOR_MARVELL diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile index ceba4aa4f026..a399defe25fd 100644 --- a/drivers/net/ethernet/marvell/Makefile +++ b/drivers/net/ethernet/marvell/Makefile @@ -12,5 +12,6 @@ obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_SKGE) += skge.o obj-$(CONFIG_SKY2) += sky2.o obj-y += octeon_ep/ +obj-y += octeon_ep_vf/ obj-y += octeontx2/ obj-y += prestera/ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig b/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig new file mode 100644 index 000000000000..dbd1267bda0c --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Marvell's Octeon PCI Endpoint NIC VF Driver Configuration +# + +config OCTEON_EP_VF + tristate "Marvell Octeon PCI Endpoint NIC VF Driver" + depends on 64BIT + depends on PCI + help + This driver supports networking functionality of Marvell's + Octeon PCI Endpoint NIC VF. + + To know the list of devices supported by this driver, refer + documentation in + . + + To compile this drivers as a module, choose M here. Name of the + module is octeon_ep_vf. diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile b/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile new file mode 100644 index 000000000000..694eb9b46e99 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Network driver for Marvell's Octeon PCI Endpoint NIC VF +# + +obj-$(CONFIG_OCTEON_EP_VF) += octeon_ep_vf.o + +octeon_ep_vf-y := octep_vf_main.o octep_vf_cn9k.o octep_vf_cnxk.o \ + octep_vf_tx.o octep_vf_rx.o octep_vf_mbox.o diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c new file mode 100644 index 000000000000..c24ef2265205 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include +#include +#include + +#include "octep_vf_config.h" +#include "octep_vf_main.h" +#include "octep_vf_regs_cn9k.h" + +/* Reset all hardware Tx/Rx queues */ +static void octep_vf_reset_io_queues_cn93(struct octep_vf_device *oct) +{ +} + +/* Initialize configuration limits and initial active config */ +static void octep_vf_init_config_cn93_vf(struct octep_vf_device *oct) +{ + struct octep_vf_config *conf = oct->conf; + u64 reg_val; + + reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(0)); + conf->ring_cfg.max_io_rings = (reg_val >> CN93_VF_R_IN_CTL_RPVF_POS) & + CN93_VF_R_IN_CTL_RPVF_MASK; + conf->ring_cfg.active_io_rings = conf->ring_cfg.max_io_rings; + + conf->iq.num_descs = OCTEP_VF_IQ_MAX_DESCRIPTORS; + conf->iq.instr_type = OCTEP_VF_64BYTE_INSTR; + conf->iq.db_min = OCTEP_VF_DB_MIN; + conf->iq.intr_threshold = OCTEP_VF_IQ_INTR_THRESHOLD; + + conf->oq.num_descs = OCTEP_VF_OQ_MAX_DESCRIPTORS; + conf->oq.buf_size = OCTEP_VF_OQ_BUF_SIZE; + conf->oq.refill_threshold = OCTEP_VF_OQ_REFILL_THRESHOLD; + conf->oq.oq_intr_pkt = OCTEP_VF_OQ_INTR_PKT_THRESHOLD; + conf->oq.oq_intr_time = OCTEP_VF_OQ_INTR_TIME_THRESHOLD; + + conf->msix_cfg.ioq_msix = conf->ring_cfg.active_io_rings; +} + +/* Setup registers for a hardware Tx Queue */ +static void octep_vf_setup_iq_regs_cn93(struct octep_vf_device *oct, int iq_no) +{ +} + +/* Setup registers for a hardware Rx Queue */ +static void octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no) +{ +} + +/* Setup registers for a VF mailbox */ +static void octep_vf_setup_mbox_regs_cn93(struct octep_vf_device *oct, int q_no) +{ +} + +/* Tx/Rx queue interrupt handler */ +static irqreturn_t octep_vf_ioq_intr_handler_cn93(void *data) +{ + return IRQ_HANDLED; +} + +/* Re-initialize Octeon hardware registers */ +static void octep_vf_reinit_regs_cn93(struct octep_vf_device *oct) +{ +} + +/* Enable all interrupts */ +static void octep_vf_enable_interrupts_cn93(struct octep_vf_device *oct) +{ +} + +/* Disable all interrupts */ +static void octep_vf_disable_interrupts_cn93(struct octep_vf_device *oct) +{ +} + +/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ +static u32 octep_vf_update_iq_read_index_cn93(struct octep_vf_iq *iq) +{ + return 0; +} + +/* Enable a hardware Tx Queue */ +static void octep_vf_enable_iq_cn93(struct octep_vf_device *oct, int iq_no) +{ +} + +/* Enable a hardware Rx Queue */ +static void octep_vf_enable_oq_cn93(struct octep_vf_device *oct, int oq_no) +{ +} + +/* Enable all hardware Tx/Rx Queues assigned to VF */ +static void octep_vf_enable_io_queues_cn93(struct octep_vf_device *oct) +{ +} + +/* Disable a hardware Tx Queue assigned to VF */ +static void octep_vf_disable_iq_cn93(struct octep_vf_device *oct, int iq_no) +{ +} + +/* Disable a hardware Rx Queue assigned to VF */ +static void octep_vf_disable_oq_cn93(struct octep_vf_device *oct, int oq_no) +{ +} + +/* Disable all hardware Tx/Rx Queues assigned to VF */ +static void octep_vf_disable_io_queues_cn93(struct octep_vf_device *oct) +{ +} + +/* Dump hardware registers (including Tx/Rx queues) for debugging. */ +static void octep_vf_dump_registers_cn93(struct octep_vf_device *oct) +{ +} + +/** + * octep_vf_device_setup_cn93() - Setup Octeon device. + * + * @oct: Octeon device private data structure. + * + * - initialize hardware operations. + * - get target side pcie port number for the device. + * - set initial configuration and max limits. + */ +void octep_vf_device_setup_cn93(struct octep_vf_device *oct) +{ + oct->hw_ops.setup_iq_regs = octep_vf_setup_iq_regs_cn93; + oct->hw_ops.setup_oq_regs = octep_vf_setup_oq_regs_cn93; + oct->hw_ops.setup_mbox_regs = octep_vf_setup_mbox_regs_cn93; + + oct->hw_ops.ioq_intr_handler = octep_vf_ioq_intr_handler_cn93; + oct->hw_ops.reinit_regs = octep_vf_reinit_regs_cn93; + + oct->hw_ops.enable_interrupts = octep_vf_enable_interrupts_cn93; + oct->hw_ops.disable_interrupts = octep_vf_disable_interrupts_cn93; + + oct->hw_ops.update_iq_read_idx = octep_vf_update_iq_read_index_cn93; + + oct->hw_ops.enable_iq = octep_vf_enable_iq_cn93; + oct->hw_ops.enable_oq = octep_vf_enable_oq_cn93; + oct->hw_ops.enable_io_queues = octep_vf_enable_io_queues_cn93; + + oct->hw_ops.disable_iq = octep_vf_disable_iq_cn93; + oct->hw_ops.disable_oq = octep_vf_disable_oq_cn93; + oct->hw_ops.disable_io_queues = octep_vf_disable_io_queues_cn93; + oct->hw_ops.reset_io_queues = octep_vf_reset_io_queues_cn93; + + oct->hw_ops.dump_registers = octep_vf_dump_registers_cn93; + octep_vf_init_config_cn93_vf(oct); +} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c new file mode 100644 index 000000000000..af07a4a6edc5 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include +#include +#include + +#include "octep_vf_config.h" +#include "octep_vf_main.h" +#include "octep_vf_regs_cnxk.h" + +/* Reset all hardware Tx/Rx queues */ +static void octep_vf_reset_io_queues_cnxk(struct octep_vf_device *oct) +{ +} + +/* Initialize configuration limits and initial active config */ +static void octep_vf_init_config_cnxk_vf(struct octep_vf_device *oct) +{ + struct octep_vf_config *conf = oct->conf; + u64 reg_val; + + reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(0)); + conf->ring_cfg.max_io_rings = (reg_val >> CNXK_VF_R_IN_CTL_RPVF_POS) & + CNXK_VF_R_IN_CTL_RPVF_MASK; + conf->ring_cfg.active_io_rings = conf->ring_cfg.max_io_rings; + + conf->iq.num_descs = OCTEP_VF_IQ_MAX_DESCRIPTORS; + conf->iq.instr_type = OCTEP_VF_64BYTE_INSTR; + conf->iq.db_min = OCTEP_VF_DB_MIN; + conf->iq.intr_threshold = OCTEP_VF_IQ_INTR_THRESHOLD; + + conf->oq.num_descs = OCTEP_VF_OQ_MAX_DESCRIPTORS; + conf->oq.buf_size = OCTEP_VF_OQ_BUF_SIZE; + conf->oq.refill_threshold = OCTEP_VF_OQ_REFILL_THRESHOLD; + conf->oq.oq_intr_pkt = OCTEP_VF_OQ_INTR_PKT_THRESHOLD; + conf->oq.oq_intr_time = OCTEP_VF_OQ_INTR_TIME_THRESHOLD; + conf->oq.wmark = OCTEP_VF_OQ_WMARK_MIN; + + conf->msix_cfg.ioq_msix = conf->ring_cfg.active_io_rings; +} + +/* Setup registers for a hardware Tx Queue */ +static void octep_vf_setup_iq_regs_cnxk(struct octep_vf_device *oct, int iq_no) +{ +} + +/* Setup registers for a hardware Rx Queue */ +static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no) +{ +} + +/* Setup registers for a VF mailbox */ +static void octep_vf_setup_mbox_regs_cnxk(struct octep_vf_device *oct, int q_no) +{ +} + +/* Tx/Rx queue interrupt handler */ +static irqreturn_t octep_vf_ioq_intr_handler_cnxk(void *data) +{ + return IRQ_HANDLED; +} + +/* Re-initialize Octeon hardware registers */ +static void octep_vf_reinit_regs_cnxk(struct octep_vf_device *oct) +{ +} + +/* Enable all interrupts */ +static void octep_vf_enable_interrupts_cnxk(struct octep_vf_device *oct) +{ +} + +/* Disable all interrupts */ +static void octep_vf_disable_interrupts_cnxk(struct octep_vf_device *oct) +{ +} + +/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ +static u32 octep_vf_update_iq_read_index_cnxk(struct octep_vf_iq *iq) +{ + return 0; +} + +/* Enable a hardware Tx Queue */ +static void octep_vf_enable_iq_cnxk(struct octep_vf_device *oct, int iq_no) +{ +} + +/* Enable a hardware Rx Queue */ +static void octep_vf_enable_oq_cnxk(struct octep_vf_device *oct, int oq_no) +{ +} + +/* Enable all hardware Tx/Rx Queues assigned to VF */ +static void octep_vf_enable_io_queues_cnxk(struct octep_vf_device *oct) +{ +} + +/* Disable a hardware Tx Queue assigned to VF */ +static void octep_vf_disable_iq_cnxk(struct octep_vf_device *oct, int iq_no) +{ +} + +/* Disable a hardware Rx Queue assigned to VF */ +static void octep_vf_disable_oq_cnxk(struct octep_vf_device *oct, int oq_no) +{ +} + +/* Disable all hardware Tx/Rx Queues assigned to VF */ +static void octep_vf_disable_io_queues_cnxk(struct octep_vf_device *oct) +{ +} + +/* Dump hardware registers (including Tx/Rx queues) for debugging. */ +static void octep_vf_dump_registers_cnxk(struct octep_vf_device *oct) +{ +} + +/** + * octep_vf_device_setup_cnxk() - Setup Octeon device. + * + * @oct: Octeon device private data structure. + * + * - initialize hardware operations. + * - get target side pcie port number for the device. + * - set initial configuration and max limits. + */ +void octep_vf_device_setup_cnxk(struct octep_vf_device *oct) +{ + oct->hw_ops.setup_iq_regs = octep_vf_setup_iq_regs_cnxk; + oct->hw_ops.setup_oq_regs = octep_vf_setup_oq_regs_cnxk; + oct->hw_ops.setup_mbox_regs = octep_vf_setup_mbox_regs_cnxk; + + oct->hw_ops.ioq_intr_handler = octep_vf_ioq_intr_handler_cnxk; + oct->hw_ops.reinit_regs = octep_vf_reinit_regs_cnxk; + + oct->hw_ops.enable_interrupts = octep_vf_enable_interrupts_cnxk; + oct->hw_ops.disable_interrupts = octep_vf_disable_interrupts_cnxk; + + oct->hw_ops.update_iq_read_idx = octep_vf_update_iq_read_index_cnxk; + + oct->hw_ops.enable_iq = octep_vf_enable_iq_cnxk; + oct->hw_ops.enable_oq = octep_vf_enable_oq_cnxk; + oct->hw_ops.enable_io_queues = octep_vf_enable_io_queues_cnxk; + + oct->hw_ops.disable_iq = octep_vf_disable_iq_cnxk; + oct->hw_ops.disable_oq = octep_vf_disable_oq_cnxk; + oct->hw_ops.disable_io_queues = octep_vf_disable_io_queues_cnxk; + oct->hw_ops.reset_io_queues = octep_vf_reset_io_queues_cnxk; + + oct->hw_ops.dump_registers = octep_vf_dump_registers_cnxk; + octep_vf_init_config_cnxk_vf(oct); +} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h new file mode 100644 index 000000000000..e03a647b0110 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h @@ -0,0 +1,160 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_VF_CONFIG_H_ +#define _OCTEP_VF_CONFIG_H_ + +/* Tx instruction types by length */ +#define OCTEP_VF_32BYTE_INSTR 32 +#define OCTEP_VF_64BYTE_INSTR 64 + +/* Tx Queue: maximum descriptors per ring */ +#define OCTEP_VF_IQ_MAX_DESCRIPTORS 1024 +/* Minimum input (Tx) requests to be enqueued to ring doorbell */ +#define OCTEP_VF_DB_MIN 8 +/* Packet threshold for Tx queue interrupt */ +#define OCTEP_VF_IQ_INTR_THRESHOLD 0x0 + +/* Minimum watermark for backpressure */ +#define OCTEP_VF_OQ_WMARK_MIN 256 + +/* Rx Queue: maximum descriptors per ring */ +#define OCTEP_VF_OQ_MAX_DESCRIPTORS 1024 + +/* Rx buffer size: Use page size buffers. + * Build skb from allocated page buffer once the packet is received. + * When a gathered packet is received, make head page as skb head and + * page buffers in consecutive Rx descriptors as fragments. + */ +#define OCTEP_VF_OQ_BUF_SIZE (SKB_WITH_OVERHEAD(PAGE_SIZE)) +#define OCTEP_VF_OQ_PKTS_PER_INTR 128 +#define OCTEP_VF_OQ_REFILL_THRESHOLD (OCTEP_VF_OQ_MAX_DESCRIPTORS / 4) + +#define OCTEP_VF_OQ_INTR_PKT_THRESHOLD 1 +#define OCTEP_VF_OQ_INTR_TIME_THRESHOLD 10 + +#define OCTEP_VF_MSIX_NAME_SIZE (IFNAMSIZ + 32) + +/* Tx Queue wake threshold + * wakeup a stopped Tx queue if minimum 2 descriptors are available. + * Even a skb with fragments consume only one Tx queue descriptor entry. + */ +#define OCTEP_VF_WAKE_QUEUE_THRESHOLD 2 + +/* Minimum MTU supported by Octeon network interface */ +#define OCTEP_VF_MIN_MTU ETH_MIN_MTU +/* Maximum MTU supported by Octeon interface*/ +#define OCTEP_VF_MAX_MTU (10000 - (ETH_HLEN + ETH_FCS_LEN)) +/* Default MTU */ +#define OCTEP_VF_DEFAULT_MTU 1500 + +/* Macros to get octeon config params */ +#define CFG_GET_IQ_CFG(cfg) ((cfg)->iq) +#define CFG_GET_IQ_NUM_DESC(cfg) ((cfg)->iq.num_descs) +#define CFG_GET_IQ_INSTR_TYPE(cfg) ((cfg)->iq.instr_type) +#define CFG_GET_IQ_INSTR_SIZE(cfg) (64) +#define CFG_GET_IQ_DB_MIN(cfg) ((cfg)->iq.db_min) +#define CFG_GET_IQ_INTR_THRESHOLD(cfg) ((cfg)->iq.intr_threshold) + +#define CFG_GET_OQ_NUM_DESC(cfg) ((cfg)->oq.num_descs) +#define CFG_GET_OQ_BUF_SIZE(cfg) ((cfg)->oq.buf_size) +#define CFG_GET_OQ_REFILL_THRESHOLD(cfg) ((cfg)->oq.refill_threshold) +#define CFG_GET_OQ_INTR_PKT(cfg) ((cfg)->oq.oq_intr_pkt) +#define CFG_GET_OQ_INTR_TIME(cfg) ((cfg)->oq.oq_intr_time) +#define CFG_GET_OQ_WMARK(cfg) ((cfg)->oq.wmark) + +#define CFG_GET_PORTS_ACTIVE_IO_RINGS(cfg) ((cfg)->ring_cfg.active_io_rings) +#define CFG_GET_PORTS_MAX_IO_RINGS(cfg) ((cfg)->ring_cfg.max_io_rings) + +#define CFG_GET_CORE_TICS_PER_US(cfg) ((cfg)->core_cfg.core_tics_per_us) +#define CFG_GET_COPROC_TICS_PER_US(cfg) ((cfg)->core_cfg.coproc_tics_per_us) + +#define CFG_GET_IOQ_MSIX(cfg) ((cfg)->msix_cfg.ioq_msix) + +/* Hardware Tx Queue configuration. */ +struct octep_vf_iq_config { + /* Size of the Input queue (number of commands) */ + u16 num_descs; + + /* Command size - 32 or 64 bytes */ + u16 instr_type; + + /* Minimum number of commands pending to be posted to Octeon before driver + * hits the Input queue doorbell. + */ + u16 db_min; + + /* Trigger the IQ interrupt when processed cmd count reaches + * this level. + */ + u32 intr_threshold; +}; + +/* Hardware Rx Queue configuration. */ +struct octep_vf_oq_config { + /* Size of Output queue (number of descriptors) */ + u16 num_descs; + + /* Size of buffer in this Output queue. */ + u16 buf_size; + + /* The number of buffers that were consumed during packet processing + * by the driver on this Output queue before the driver attempts to + * replenish the descriptor ring with new buffers. + */ + u16 refill_threshold; + + /* Interrupt Coalescing (Packet Count). Octeon will interrupt the host + * only if it sent as many packets as specified by this field. + * The driver usually does not use packet count interrupt coalescing. + */ + u32 oq_intr_pkt; + + /* Interrupt Coalescing (Time Interval). Octeon will interrupt the host + * if at least one packet was sent in the time interval specified by + * this field. The driver uses time interval interrupt coalescing by + * default. The time is specified in microseconds. + */ + u32 oq_intr_time; + + /* Water mark for backpressure. + * Output queue sends backpressure signal to source when + * free buffer count falls below wmark. + */ + u32 wmark; +}; + +/* Tx/Rx configuration */ +struct octep_vf_ring_config { + /* Max number of IOQs */ + u16 max_io_rings; + + /* Number of active IOQs */ + u16 active_io_rings; +}; + +/* Octeon MSI-x config. */ +struct octep_vf_msix_config { + /* Number of IOQ interrupts */ + u16 ioq_msix; +}; + +/* Data Structure to hold configuration limits and active config */ +struct octep_vf_config { + /* Input Queue attributes. */ + struct octep_vf_iq_config iq; + + /* Output Queue attributes. */ + struct octep_vf_oq_config oq; + + /* MSI-X interrupt config */ + struct octep_vf_msix_config msix_cfg; + + /* NIC VF ring Configuration */ + struct octep_vf_ring_config ring_cfg; +}; +#endif /* _OCTEP_VF_CONFIG_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c new file mode 100644 index 000000000000..50a2a13c5f2e --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c @@ -0,0 +1,528 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "octep_vf_config.h" +#include "octep_vf_main.h" + +struct workqueue_struct *octep_vf_wq; + +/* Supported Devices */ +static const struct pci_device_id octep_vf_pci_id_tbl[] = { + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_VF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_VF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN98_VF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_VF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KA_VF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KB_VF)}, + {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KB_VF)}, + {0, }, +}; +MODULE_DEVICE_TABLE(pci, octep_vf_pci_id_tbl); + +MODULE_AUTHOR("Veerasenareddy Burru "); +MODULE_DESCRIPTION(OCTEP_VF_DRV_STRING); +MODULE_LICENSE("GPL"); + +static void octep_vf_link_up(struct net_device *netdev) +{ + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); +} + +static void octep_vf_set_rx_state(struct octep_vf_device *oct, bool up) +{ + int err; + + err = octep_vf_mbox_set_rx_state(oct, up); + if (err) + netdev_err(oct->netdev, "Set Rx state to %d failed with err:%d\n", up, err); +} + +static int octep_vf_get_link_status(struct octep_vf_device *oct) +{ + int err; + + err = octep_vf_mbox_get_link_status(oct, &oct->link_info.oper_up); + if (err) + netdev_err(oct->netdev, "Get link status failed with err:%d\n", err); + return oct->link_info.oper_up; +} + +static void octep_vf_set_link_status(struct octep_vf_device *oct, bool up) +{ + int err; + + err = octep_vf_mbox_set_link_status(oct, up); + if (err) { + netdev_err(oct->netdev, "Set link status to %d failed with err:%d\n", up, err); + return; + } + oct->link_info.oper_up = up; +} + +/** + * octep_vf_open() - start the octeon network device. + * + * @netdev: pointer to kernel network device. + * + * setup Tx/Rx queues, interrupts and enable hardware operation of Tx/Rx queues + * and interrupts.. + * + * Return: 0, on successfully setting up device and bring it up. + * -1, on any error. + */ +static int octep_vf_open(struct net_device *netdev) +{ + struct octep_vf_device *oct = netdev_priv(netdev); + int err, ret; + + netdev_info(netdev, "Starting netdev ...\n"); + netif_carrier_off(netdev); + + oct->hw_ops.reset_io_queues(oct); + + if (octep_vf_setup_iqs(oct)) + goto setup_iq_err; + if (octep_vf_setup_oqs(oct)) + goto setup_oq_err; + + err = netif_set_real_num_tx_queues(netdev, oct->num_oqs); + if (err) + goto set_queues_err; + err = netif_set_real_num_rx_queues(netdev, oct->num_iqs); + if (err) + goto set_queues_err; + + oct->link_info.admin_up = 1; + octep_vf_set_rx_state(oct, true); + + ret = octep_vf_get_link_status(oct); + if (!ret) + octep_vf_set_link_status(oct, true); + + /* Enable the input and output queues for this Octeon device */ + oct->hw_ops.enable_io_queues(oct); + + /* Enable Octeon device interrupts */ + oct->hw_ops.enable_interrupts(oct); + + octep_vf_oq_dbell_init(oct); + + ret = octep_vf_get_link_status(oct); + if (ret) + octep_vf_link_up(netdev); + + return 0; + +set_queues_err: + octep_vf_free_oqs(oct); +setup_oq_err: + octep_vf_free_iqs(oct); +setup_iq_err: + return -1; +} + +/** + * octep_vf_stop() - stop the octeon network device. + * + * @netdev: pointer to kernel network device. + * + * stop the device Tx/Rx operations, bring down the link and + * free up all resources allocated for Tx/Rx queues and interrupts. + */ +static int octep_vf_stop(struct net_device *netdev) +{ + struct octep_vf_device *oct = netdev_priv(netdev); + + netdev_info(netdev, "Stopping the device ...\n"); + + /* Stop Tx from stack */ + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + netif_tx_disable(netdev); + + octep_vf_set_link_status(oct, false); + octep_vf_set_rx_state(oct, false); + + oct->link_info.admin_up = 0; + oct->link_info.oper_up = 0; + + oct->hw_ops.disable_interrupts(oct); + + octep_vf_clean_iqs(oct); + + oct->hw_ops.disable_io_queues(oct); + oct->hw_ops.reset_io_queues(oct); + octep_vf_free_oqs(oct); + octep_vf_free_iqs(oct); + netdev_info(netdev, "Device stopped !!\n"); + return 0; +} + +/** + * octep_vf_start_xmit() - Enqueue packet to Octoen hardware Tx Queue. + * + * @skb: packet skbuff pointer. + * @netdev: kernel network device. + * + * Return: NETDEV_TX_BUSY, if Tx Queue is full. + * NETDEV_TX_OK, if successfully enqueued to hardware Tx queue. + */ +static netdev_tx_t octep_vf_start_xmit(struct sk_buff *skb, + struct net_device *netdev) +{ + return NETDEV_TX_OK; +} + +/** + * octep_vf_tx_timeout_task - work queue task to Handle Tx queue timeout. + * + * @work: pointer to Tx queue timeout work_struct + * + * Stop and start the device so that it frees up all queue resources + * and restarts the queues, that potentially clears a Tx queue timeout + * condition. + **/ +static void octep_vf_tx_timeout_task(struct work_struct *work) +{ + struct octep_vf_device *oct = container_of(work, struct octep_vf_device, + tx_timeout_task); + struct net_device *netdev = oct->netdev; + + rtnl_lock(); + if (netif_running(netdev)) { + octep_vf_stop(netdev); + octep_vf_open(netdev); + } + rtnl_unlock(); +} + +/** + * octep_vf_tx_timeout() - Handle Tx Queue timeout. + * + * @netdev: pointer to kernel network device. + * @txqueue: Timed out Tx queue number. + * + * Schedule a work to handle Tx queue timeout. + */ +static void octep_vf_tx_timeout(struct net_device *netdev, unsigned int txqueue) +{ + struct octep_vf_device *oct = netdev_priv(netdev); + + queue_work(octep_vf_wq, &oct->tx_timeout_task); +} + +static const struct net_device_ops octep_vf_netdev_ops = { + .ndo_open = octep_vf_open, + .ndo_stop = octep_vf_stop, + .ndo_start_xmit = octep_vf_start_xmit, + .ndo_tx_timeout = octep_vf_tx_timeout, +}; + +static const char *octep_vf_devid_to_str(struct octep_vf_device *oct) +{ + switch (oct->chip_id) { + case OCTEP_PCI_DEVICE_ID_CN93_VF: + return "CN93XX"; + case OCTEP_PCI_DEVICE_ID_CNF95N_VF: + return "CNF95N"; + case OCTEP_PCI_DEVICE_ID_CN10KA_VF: + return "CN10KA"; + case OCTEP_PCI_DEVICE_ID_CNF10KA_VF: + return "CNF10KA"; + case OCTEP_PCI_DEVICE_ID_CNF10KB_VF: + return "CNF10KB"; + case OCTEP_PCI_DEVICE_ID_CN10KB_VF: + return "CN10KB"; + default: + return "Unsupported"; + } +} + +/** + * octep_vf_device_setup() - Setup Octeon Device. + * + * @oct: Octeon device private data structure. + * + * Setup Octeon device hardware operations, configuration, etc ... + */ +int octep_vf_device_setup(struct octep_vf_device *oct) +{ + struct pci_dev *pdev = oct->pdev; + + /* allocate memory for oct->conf */ + oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL); + if (!oct->conf) + return -ENOMEM; + + /* Map BAR region 0 */ + oct->mmio.hw_addr = ioremap(pci_resource_start(oct->pdev, 0), + pci_resource_len(oct->pdev, 0)); + if (!oct->mmio.hw_addr) { + dev_err(&pdev->dev, + "Failed to remap BAR0; start=0x%llx len=0x%llx\n", + pci_resource_start(oct->pdev, 0), + pci_resource_len(oct->pdev, 0)); + goto ioremap_err; + } + oct->mmio.mapped = 1; + + oct->chip_id = pdev->device; + oct->rev_id = pdev->revision; + dev_info(&pdev->dev, "chip_id = 0x%x\n", pdev->device); + + switch (oct->chip_id) { + case OCTEP_PCI_DEVICE_ID_CN93_VF: + case OCTEP_PCI_DEVICE_ID_CNF95N_VF: + case OCTEP_PCI_DEVICE_ID_CN98_VF: + dev_info(&pdev->dev, "Setting up OCTEON %s VF PASS%d.%d\n", + octep_vf_devid_to_str(oct), OCTEP_VF_MAJOR_REV(oct), + OCTEP_VF_MINOR_REV(oct)); + octep_vf_device_setup_cn93(oct); + break; + case OCTEP_PCI_DEVICE_ID_CNF10KA_VF: + case OCTEP_PCI_DEVICE_ID_CN10KA_VF: + case OCTEP_PCI_DEVICE_ID_CNF10KB_VF: + case OCTEP_PCI_DEVICE_ID_CN10KB_VF: + dev_info(&pdev->dev, "Setting up OCTEON %s VF PASS%d.%d\n", + octep_vf_devid_to_str(oct), OCTEP_VF_MAJOR_REV(oct), + OCTEP_VF_MINOR_REV(oct)); + octep_vf_device_setup_cnxk(oct); + break; + default: + dev_err(&pdev->dev, "Unsupported device\n"); + goto unsupported_dev; + } + + return 0; + +unsupported_dev: + iounmap(oct->mmio.hw_addr); +ioremap_err: + kfree(oct->conf); + return -EOPNOTSUPP; +} + +/** + * octep_vf_device_cleanup() - Cleanup Octeon Device. + * + * @oct: Octeon device private data structure. + * + * Cleanup Octeon device allocated resources. + */ +static void octep_vf_device_cleanup(struct octep_vf_device *oct) +{ + dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n"); + + if (oct->mmio.mapped) + iounmap(oct->mmio.hw_addr); + + kfree(oct->conf); + oct->conf = NULL; +} + +static int octep_vf_get_mac_addr(struct octep_vf_device *oct, u8 *addr) +{ + return octep_vf_mbox_get_mac_addr(oct, addr); +} + +/** + * octep_vf_probe() - Octeon PCI device probe handler. + * + * @pdev: PCI device structure. + * @ent: entry in Octeon PCI device ID table. + * + * Initializes and enables the Octeon PCI device for network operations. + * Initializes Octeon private data structure and registers a network device. + */ +static int octep_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct octep_vf_device *octep_vf_dev; + struct net_device *netdev; + int err; + + err = pci_enable_device(pdev); + if (err) { + dev_err(&pdev->dev, "Failed to enable PCI device\n"); + return err; + } + + err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (err) { + dev_err(&pdev->dev, "Failed to set DMA mask !!\n"); + goto err_dma_mask; + } + + err = pci_request_mem_regions(pdev, OCTEP_VF_DRV_NAME); + if (err) { + dev_err(&pdev->dev, "Failed to map PCI memory regions\n"); + goto err_pci_regions; + } + + pci_set_master(pdev); + + netdev = alloc_etherdev_mq(sizeof(struct octep_vf_device), + OCTEP_VF_MAX_QUEUES); + if (!netdev) { + dev_err(&pdev->dev, "Failed to allocate netdev\n"); + err = -ENOMEM; + goto err_alloc_netdev; + } + SET_NETDEV_DEV(netdev, &pdev->dev); + + octep_vf_dev = netdev_priv(netdev); + octep_vf_dev->netdev = netdev; + octep_vf_dev->pdev = pdev; + octep_vf_dev->dev = &pdev->dev; + pci_set_drvdata(pdev, octep_vf_dev); + + err = octep_vf_device_setup(octep_vf_dev); + if (err) { + dev_err(&pdev->dev, "Device setup failed\n"); + goto err_octep_vf_config; + } + INIT_WORK(&octep_vf_dev->tx_timeout_task, octep_vf_tx_timeout_task); + + netdev->netdev_ops = &octep_vf_netdev_ops; + netif_carrier_off(netdev); + + if (octep_vf_setup_mbox(octep_vf_dev)) { + dev_err(&pdev->dev, "VF Mailbox setup failed\n"); + err = -ENOMEM; + goto err_setup_mbox; + } + + if (octep_vf_mbox_version_check(octep_vf_dev)) { + dev_err(&pdev->dev, "PF VF Mailbox version mismatch\n"); + err = -EINVAL; + goto err_mbox_version; + } + + netdev->hw_features = NETIF_F_SG; + netdev->min_mtu = OCTEP_VF_MIN_MTU; + netdev->max_mtu = OCTEP_VF_MAX_MTU; + netdev->mtu = OCTEP_VF_DEFAULT_MTU; + + netdev->features |= netdev->hw_features; + octep_vf_get_mac_addr(octep_vf_dev, octep_vf_dev->mac_addr); + eth_hw_addr_set(netdev, octep_vf_dev->mac_addr); + err = register_netdev(netdev); + if (err) { + dev_err(&pdev->dev, "Failed to register netdev\n"); + goto err_register_dev; + } + dev_info(&pdev->dev, "Device probe successful\n"); + return 0; + +err_register_dev: +err_mbox_version: + octep_vf_delete_mbox(octep_vf_dev); +err_setup_mbox: + octep_vf_device_cleanup(octep_vf_dev); +err_octep_vf_config: + free_netdev(netdev); +err_alloc_netdev: + pci_release_mem_regions(pdev); +err_pci_regions: +err_dma_mask: + pci_disable_device(pdev); + dev_err(&pdev->dev, "Device probe failed\n"); + return err; +} + +/** + * octep_vf_remove() - Remove Octeon PCI device from driver control. + * + * @pdev: PCI device structure of the Octeon device. + * + * Cleanup all resources allocated for the Octeon device. + * Unregister from network device and disable the PCI device. + */ +static void octep_vf_remove(struct pci_dev *pdev) +{ + struct octep_vf_device *oct = pci_get_drvdata(pdev); + struct net_device *netdev; + + if (!oct) + return; + + octep_vf_mbox_dev_remove(oct); + cancel_work_sync(&oct->tx_timeout_task); + netdev = oct->netdev; + if (netdev->reg_state == NETREG_REGISTERED) + unregister_netdev(netdev); + octep_vf_delete_mbox(oct); + octep_vf_device_cleanup(oct); + pci_release_mem_regions(pdev); + free_netdev(netdev); + pci_disable_device(pdev); +} + +static struct pci_driver octep_vf_driver = { + .name = OCTEP_VF_DRV_NAME, + .id_table = octep_vf_pci_id_tbl, + .probe = octep_vf_probe, + .remove = octep_vf_remove, +}; + +/** + * octep_vf_init_module() - Module initialization. + * + * create common resource for the driver and register PCI driver. + */ +static int __init octep_vf_init_module(void) +{ + int ret; + + pr_info("%s: Loading %s ...\n", OCTEP_VF_DRV_NAME, OCTEP_VF_DRV_STRING); + + /* work queue for all deferred tasks */ + octep_vf_wq = create_singlethread_workqueue(OCTEP_VF_DRV_NAME); + if (!octep_vf_wq) { + pr_err("%s: Failed to create common workqueue\n", + OCTEP_VF_DRV_NAME); + return -ENOMEM; + } + + ret = pci_register_driver(&octep_vf_driver); + if (ret < 0) { + pr_err("%s: Failed to register PCI driver; err=%d\n", + OCTEP_VF_DRV_NAME, ret); + return ret; + } + + pr_info("%s: Loaded successfully !\n", OCTEP_VF_DRV_NAME); + + return ret; +} + +/** + * octep_vf_exit_module() - Module exit routine. + * + * unregister the driver with PCI subsystem and cleanup common resources. + */ +static void __exit octep_vf_exit_module(void) +{ + pr_info("%s: Unloading ...\n", OCTEP_VF_DRV_NAME); + + pci_unregister_driver(&octep_vf_driver); + destroy_workqueue(octep_vf_wq); + + pr_info("%s: Unloading complete\n", OCTEP_VF_DRV_NAME); +} + +module_init(octep_vf_init_module); +module_exit(octep_vf_exit_module); diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h new file mode 100644 index 000000000000..ad1dc0a49614 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h @@ -0,0 +1,336 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_VF_MAIN_H_ +#define _OCTEP_VF_MAIN_H_ + +#include "octep_vf_tx.h" +#include "octep_vf_rx.h" +#include "octep_vf_mbox.h" + +#define OCTEP_VF_DRV_NAME "octeon_ep_vf" +#define OCTEP_VF_DRV_STRING "Marvell Octeon EndPoint NIC VF Driver" + +#define OCTEP_PCI_DEVICE_ID_CN93_VF 0xB203 //93xx VF +#define OCTEP_PCI_DEVICE_ID_CNF95N_VF 0xB403 //95N VF +#define OCTEP_PCI_DEVICE_ID_CN98_VF 0xB103 +#define OCTEP_PCI_DEVICE_ID_CN10KA_VF 0xB903 +#define OCTEP_PCI_DEVICE_ID_CNF10KA_VF 0xBA03 +#define OCTEP_PCI_DEVICE_ID_CNF10KB_VF 0xBC03 +#define OCTEP_PCI_DEVICE_ID_CN10KB_VF 0xBD03 + +#define OCTEP_VF_MAX_QUEUES 63 +#define OCTEP_VF_MAX_IQ OCTEP_VF_MAX_QUEUES +#define OCTEP_VF_MAX_OQ OCTEP_VF_MAX_QUEUES + +#define OCTEP_VF_MAX_MSIX_VECTORS OCTEP_VF_MAX_OQ + +#define OCTEP_VF_IQ_INTR_RESEND_BIT 59 +#define OCTEP_VF_OQ_INTR_RESEND_BIT 59 + +#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \ + ((iq__)->host_write_index - (iq__)->flush_index) & \ + (iq__)->ring_size_mask; \ + }) +#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \ + (iq_)->max_count - IQ_INSTR_PENDING(iq_); \ + }) + +#ifndef UINT64_MAX +#define UINT64_MAX ((u64)(~((u64)0))) /* 0xFFFFFFFFFFFFFFFF */ +#endif + +/* PCI address space mapping information. + * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of + * Octeon gets mapped to different physical address spaces in + * the kernel. + */ +struct octep_vf_mmio { + /* The physical address to which the PCI address space is mapped. */ + u8 __iomem *hw_addr; + + /* Flag indicating the mapping was successful. */ + int mapped; +}; + +struct octep_vf_hw_ops { + void (*setup_iq_regs)(struct octep_vf_device *oct, int q); + void (*setup_oq_regs)(struct octep_vf_device *oct, int q); + void (*setup_mbox_regs)(struct octep_vf_device *oct, int mbox); + + irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector); + irqreturn_t (*ioq_intr_handler)(void *ioq_vector); + void (*reinit_regs)(struct octep_vf_device *oct); + u32 (*update_iq_read_idx)(struct octep_vf_iq *iq); + + void (*enable_interrupts)(struct octep_vf_device *oct); + void (*disable_interrupts)(struct octep_vf_device *oct); + + void (*enable_io_queues)(struct octep_vf_device *oct); + void (*disable_io_queues)(struct octep_vf_device *oct); + void (*enable_iq)(struct octep_vf_device *oct, int q); + void (*disable_iq)(struct octep_vf_device *oct, int q); + void (*enable_oq)(struct octep_vf_device *oct, int q); + void (*disable_oq)(struct octep_vf_device *oct, int q); + void (*reset_io_queues)(struct octep_vf_device *oct); + void (*dump_registers)(struct octep_vf_device *oct); +}; + +/* Octeon mailbox data */ +struct octep_vf_mbox_data { + /* Holds the offset of received data via mailbox. */ + u32 data_index; + + /* Holds the received data via mailbox. */ + u8 recv_data[OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE]; +}; + +/* wrappers around work structs */ +struct octep_vf_mbox_wk { + struct work_struct work; + void *ctxptr; +}; + +/* Octeon device mailbox */ +struct octep_vf_mbox { + /* A mutex to protect access to this q_mbox. */ + struct mutex lock; + + u32 state; + + /* SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */ + u8 __iomem *mbox_int_reg; + + /* SLI_PKT_PF_VF_MBOX_SIG(0) for PF, + * SLI_PKT_PF_VF_MBOX_SIG(1) for VF. + */ + u8 __iomem *mbox_write_reg; + + /* SLI_PKT_PF_VF_MBOX_SIG(1) for PF, + * SLI_PKT_PF_VF_MBOX_SIG(0) for VF. + */ + u8 __iomem *mbox_read_reg; + + /* Octeon mailbox data */ + struct octep_vf_mbox_data mbox_data; + + /* Octeon mailbox work handler to process Mbox messages */ + struct octep_vf_mbox_wk wk; +}; + +/* Tx/Rx queue vector per interrupt. */ +struct octep_vf_ioq_vector { + char name[OCTEP_VF_MSIX_NAME_SIZE]; + struct napi_struct napi; + struct octep_vf_device *octep_vf_dev; + struct octep_vf_iq *iq; + struct octep_vf_oq *oq; + cpumask_t affinity_mask; +}; + +/* Octeon hardware/firmware offload capability flags. */ +#define OCTEP_VF_CAP_TX_CHECKSUM BIT(0) +#define OCTEP_VF_CAP_RX_CHECKSUM BIT(1) +#define OCTEP_VF_CAP_TSO BIT(2) + +/* Link modes */ +enum octep_vf_link_mode_bit_indices { + OCTEP_VF_LINK_MODE_10GBASE_T = 0, + OCTEP_VF_LINK_MODE_10GBASE_R, + OCTEP_VF_LINK_MODE_10GBASE_CR, + OCTEP_VF_LINK_MODE_10GBASE_KR, + OCTEP_VF_LINK_MODE_10GBASE_LR, + OCTEP_VF_LINK_MODE_10GBASE_SR, + OCTEP_VF_LINK_MODE_25GBASE_CR, + OCTEP_VF_LINK_MODE_25GBASE_KR, + OCTEP_VF_LINK_MODE_25GBASE_SR, + OCTEP_VF_LINK_MODE_40GBASE_CR4, + OCTEP_VF_LINK_MODE_40GBASE_KR4, + OCTEP_VF_LINK_MODE_40GBASE_LR4, + OCTEP_VF_LINK_MODE_40GBASE_SR4, + OCTEP_VF_LINK_MODE_50GBASE_CR2, + OCTEP_VF_LINK_MODE_50GBASE_KR2, + OCTEP_VF_LINK_MODE_50GBASE_SR2, + OCTEP_VF_LINK_MODE_50GBASE_CR, + OCTEP_VF_LINK_MODE_50GBASE_KR, + OCTEP_VF_LINK_MODE_50GBASE_LR, + OCTEP_VF_LINK_MODE_50GBASE_SR, + OCTEP_VF_LINK_MODE_100GBASE_CR4, + OCTEP_VF_LINK_MODE_100GBASE_KR4, + OCTEP_VF_LINK_MODE_100GBASE_LR4, + OCTEP_VF_LINK_MODE_100GBASE_SR4, + OCTEP_VF_LINK_MODE_NBITS +}; + +/* Hardware interface link state information. */ +struct octep_vf_iface_link_info { + /* Bitmap of Supported link speeds/modes. */ + u64 supported_modes; + + /* Bitmap of Advertised link speeds/modes. */ + u64 advertised_modes; + + /* Negotiated link speed in Mbps. */ + u32 speed; + + /* MTU */ + u16 mtu; + + /* Autonegotiation state. */ +#define OCTEP_VF_LINK_MODE_AUTONEG_SUPPORTED BIT(0) +#define OCTEP_VF_LINK_MODE_AUTONEG_ADVERTISED BIT(1) + u8 autoneg; + + /* Pause frames setting. */ +#define OCTEP_VF_LINK_MODE_PAUSE_SUPPORTED BIT(0) +#define OCTEP_VF_LINK_MODE_PAUSE_ADVERTISED BIT(1) + u8 pause; + + /* Admin state of the link (ifconfig up/down */ + u8 admin_up; + + /* Operational state of the link: physical link is up down */ + u8 oper_up; +}; + +/* Hardware interface stats information. */ +struct octep_vf_iface_rxtx_stats { + /* Hardware Interface Rx statistics */ + struct octep_vf_iface_rx_stats iface_rx_stats; + + /* Hardware Interface Tx statistics */ + struct octep_vf_iface_tx_stats iface_tx_stats; +}; + +struct octep_vf_fw_info { + /* pkind value to be used in every Tx hardware descriptor */ + u8 pkind; + /* front size data */ + u8 fsz; + /* supported rx offloads OCTEP_VF_RX_OFFLOAD_* */ + u16 rx_ol_flags; + /* supported tx offloads OCTEP_VF_TX_OFFLOAD_* */ + u16 tx_ol_flags; +}; + +/* The Octeon device specific private data structure. + * Each Octeon device has this structure to represent all its components. + */ +struct octep_vf_device { + struct octep_vf_config *conf; + + /* Octeon Chip type. */ + u16 chip_id; + u16 rev_id; + + /* Device capabilities enabled */ + u64 caps_enabled; + /* Device capabilities supported */ + u64 caps_supported; + + /* Pointer to basic Linux device */ + struct device *dev; + /* Linux PCI device pointer */ + struct pci_dev *pdev; + /* Netdev corresponding to the Octeon device */ + struct net_device *netdev; + + /* memory mapped io range */ + struct octep_vf_mmio mmio; + + /* MAC address */ + u8 mac_addr[ETH_ALEN]; + + /* Tx queues (IQ: Instruction Queue) */ + u16 num_iqs; + /* Pointers to Octeon Tx queues */ + struct octep_vf_iq *iq[OCTEP_VF_MAX_IQ]; + + /* Rx queues (OQ: Output Queue) */ + u16 num_oqs; + /* Pointers to Octeon Rx queues */ + struct octep_vf_oq *oq[OCTEP_VF_MAX_OQ]; + + /* Hardware port number of the PCIe interface */ + u16 pcie_port; + + /* Hardware operations */ + struct octep_vf_hw_ops hw_ops; + + /* IRQ info */ + u16 num_irqs; + u16 num_non_ioq_irqs; + char *non_ioq_irq_names; + struct msix_entry *msix_entries; + /* IOq information of it's corresponding MSI-X interrupt. */ + struct octep_vf_ioq_vector *ioq_vector[OCTEP_VF_MAX_QUEUES]; + + /* Hardware Interface Tx statistics */ + struct octep_vf_iface_tx_stats iface_tx_stats; + /* Hardware Interface Rx statistics */ + struct octep_vf_iface_rx_stats iface_rx_stats; + + /* Hardware Interface Link info like supported modes, aneg support */ + struct octep_vf_iface_link_info link_info; + + /* Mailbox to talk to VFs */ + struct octep_vf_mbox *mbox; + + /* Work entry to handle Tx timeout */ + struct work_struct tx_timeout_task; + + /* offset for iface stats */ + u32 ctrl_mbox_ifstats_offset; + + /* Negotiated Mbox version */ + u32 mbox_neg_ver; + + /* firmware info */ + struct octep_vf_fw_info fw_info; +}; + +static inline u16 OCTEP_VF_MAJOR_REV(struct octep_vf_device *oct) +{ + u16 rev = (oct->rev_id & 0xC) >> 2; + + return (rev == 0) ? 1 : rev; +} + +static inline u16 OCTEP_VF_MINOR_REV(struct octep_vf_device *oct) +{ + return (oct->rev_id & 0x3); +} + +/* Octeon CSR read/write access APIs */ +#define octep_vf_write_csr(octep_vf_dev, reg_off, value) \ + writel(value, (octep_vf_dev)->mmio.hw_addr + (reg_off)) + +#define octep_vf_write_csr64(octep_vf_dev, reg_off, val64) \ + writeq(val64, (octep_vf_dev)->mmio.hw_addr + (reg_off)) + +#define octep_vf_read_csr(octep_vf_dev, reg_off) \ + readl((octep_vf_dev)->mmio.hw_addr + (reg_off)) + +#define octep_vf_read_csr64(octep_vf_dev, reg_off) \ + readq((octep_vf_dev)->mmio.hw_addr + (reg_off)) + +extern struct workqueue_struct *octep_vf_wq; + +int octep_vf_device_setup(struct octep_vf_device *oct); +int octep_vf_setup_iqs(struct octep_vf_device *oct); +void octep_vf_free_iqs(struct octep_vf_device *oct); +void octep_vf_clean_iqs(struct octep_vf_device *oct); +int octep_vf_setup_oqs(struct octep_vf_device *oct); +void octep_vf_free_oqs(struct octep_vf_device *oct); +void octep_vf_oq_dbell_init(struct octep_vf_device *oct); +void octep_vf_device_setup_cn93(struct octep_vf_device *oct); +void octep_vf_device_setup_cnxk(struct octep_vf_device *oct); +int octep_vf_iq_process_completions(struct octep_vf_iq *iq, u16 budget); +int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget); +int octep_vf_get_if_stats(struct octep_vf_device *oct); +void octep_vf_mbox_work(struct work_struct *work); +#endif /* _OCTEP_VF_MAIN_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c new file mode 100644 index 000000000000..1c1fe293fc50 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ +#include +#include +#include +#include "octep_vf_config.h" +#include "octep_vf_main.h" + +int octep_vf_setup_mbox(struct octep_vf_device *oct) +{ + int ring = 0; + + oct->mbox = vzalloc(sizeof(*oct->mbox)); + if (!oct->mbox) + return -1; + + mutex_init(&oct->mbox->lock); + + oct->hw_ops.setup_mbox_regs(oct, ring); + INIT_WORK(&oct->mbox->wk.work, octep_vf_mbox_work); + oct->mbox->wk.ctxptr = oct; + dev_info(&oct->pdev->dev, "setup vf mbox successfully\n"); + return 0; +} + +void octep_vf_delete_mbox(struct octep_vf_device *oct) +{ + if (oct->mbox) { + if (work_pending(&oct->mbox->wk.work)) + cancel_work_sync(&oct->mbox->wk.work); + + mutex_destroy(&oct->mbox->lock); + vfree(oct->mbox); + oct->mbox = NULL; + dev_info(&oct->pdev->dev, "Deleted vf mbox successfully\n"); + } +} + +int octep_vf_mbox_version_check(struct octep_vf_device *oct) +{ + return 0; +} + +void octep_vf_mbox_work(struct work_struct *work) +{ +} + +int octep_vf_mbox_set_mtu(struct octep_vf_device *oct, int mtu) +{ + return 0; +} + +int octep_vf_mbox_set_mac_addr(struct octep_vf_device *oct, char *mac_addr) +{ + return 0; +} + +int octep_vf_mbox_get_mac_addr(struct octep_vf_device *oct, char *mac_addr) +{ + return 0; +} + +int octep_vf_mbox_set_rx_state(struct octep_vf_device *oct, bool state) +{ + return 0; +} + +int octep_vf_mbox_set_link_status(struct octep_vf_device *oct, bool status) +{ + return 0; +} + +int octep_vf_mbox_get_link_status(struct octep_vf_device *oct, u8 *oper_up) +{ + return 0; +} + +int octep_vf_mbox_dev_remove(struct octep_vf_device *oct) +{ + return 0; +} + +int octep_vf_mbox_get_fw_info(struct octep_vf_device *oct) +{ + return 0; +} + +int octep_vf_mbox_set_offloads(struct octep_vf_device *oct, u16 tx_offloads, + u16 rx_offloads) +{ + return 0; +} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h new file mode 100644 index 000000000000..14f4fb19445b --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ +#ifndef _OCTEP_VF_MBOX_H_ +#define _OCTEP_VF_MBOX_H_ + +#define OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE 256 + +int octep_vf_setup_mbox(struct octep_vf_device *oct); +void octep_vf_delete_mbox(struct octep_vf_device *oct); +int octep_vf_mbox_set_mtu(struct octep_vf_device *oct, int mtu); +int octep_vf_mbox_set_mac_addr(struct octep_vf_device *oct, char *mac_addr); +int octep_vf_mbox_get_mac_addr(struct octep_vf_device *oct, char *mac_addr); +int octep_vf_mbox_version_check(struct octep_vf_device *oct); +int octep_vf_mbox_set_rx_state(struct octep_vf_device *oct, bool state); +int octep_vf_mbox_set_link_status(struct octep_vf_device *oct, bool status); +int octep_vf_mbox_get_link_status(struct octep_vf_device *oct, u8 *oper_up); +int octep_vf_mbox_dev_remove(struct octep_vf_device *oct); +int octep_vf_mbox_get_fw_info(struct octep_vf_device *oct); +int octep_vf_mbox_set_offloads(struct octep_vf_device *oct, u16 tx_offloads, u16 rx_offloads); + +#endif diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h new file mode 100644 index 000000000000..25e2a876ebba --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h @@ -0,0 +1,154 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ +#ifndef _OCTEP_VF_REGS_CN9K_H_ +#define _OCTEP_VF_REGS_CN9K_H_ + +/*############################ RST #########################*/ +#define CN93_VF_CONFIG_XPANSION_BAR 0x38 +#define CN93_VF_CONFIG_PCIE_CAP 0x70 +#define CN93_VF_CONFIG_PCIE_DEVCAP 0x74 +#define CN93_VF_CONFIG_PCIE_DEVCTL 0x78 +#define CN93_VF_CONFIG_PCIE_LINKCAP 0x7C +#define CN93_VF_CONFIG_PCIE_LINKCTL 0x80 +#define CN93_VF_CONFIG_PCIE_SLOTCAP 0x84 +#define CN93_VF_CONFIG_PCIE_SLOTCTL 0x88 + +#define CN93_VF_RING_OFFSET BIT_ULL(17) + +/*###################### RING IN REGISTERS #########################*/ +#define CN93_VF_SDP_R_IN_CONTROL_START 0x10000 +#define CN93_VF_SDP_R_IN_ENABLE_START 0x10010 +#define CN93_VF_SDP_R_IN_INSTR_BADDR_START 0x10020 +#define CN93_VF_SDP_R_IN_INSTR_RSIZE_START 0x10030 +#define CN93_VF_SDP_R_IN_INSTR_DBELL_START 0x10040 +#define CN93_VF_SDP_R_IN_CNTS_START 0x10050 +#define CN93_VF_SDP_R_IN_INT_LEVELS_START 0x10060 +#define CN93_VF_SDP_R_IN_PKT_CNT_START 0x10080 +#define CN93_VF_SDP_R_IN_BYTE_CNT_START 0x10090 + +#define CN93_VF_SDP_R_IN_CONTROL(ring) \ + (CN93_VF_SDP_R_IN_CONTROL_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_ENABLE(ring) \ + (CN93_VF_SDP_R_IN_ENABLE_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_INSTR_BADDR(ring) \ + (CN93_VF_SDP_R_IN_INSTR_BADDR_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_INSTR_RSIZE(ring) \ + (CN93_VF_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_INSTR_DBELL(ring) \ + (CN93_VF_SDP_R_IN_INSTR_DBELL_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_CNTS(ring) \ + (CN93_VF_SDP_R_IN_CNTS_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_INT_LEVELS(ring) \ + (CN93_VF_SDP_R_IN_INT_LEVELS_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_PKT_CNT(ring) \ + (CN93_VF_SDP_R_IN_PKT_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_IN_BYTE_CNT(ring) \ + (CN93_VF_SDP_R_IN_BYTE_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) + +/*------------------ R_IN Masks ----------------*/ + +/** Rings per Virtual Function **/ +#define CN93_VF_R_IN_CTL_RPVF_MASK (0xF) +#define CN93_VF_R_IN_CTL_RPVF_POS (48) + +/* Number of instructions to be read in one MAC read request. + * setting to Max value(4) + **/ +#define CN93_VF_R_IN_CTL_IDLE BIT_ULL(28) +#define CN93_VF_R_IN_CTL_RDSIZE (0x3ULL << 25) +#define CN93_VF_R_IN_CTL_IS_64B BIT_ULL(24) +#define CN93_VF_R_IN_CTL_D_NSR BIT_ULL(8) +#define CN93_VF_R_IN_CTL_D_ESR BIT_ULL(6) +#define CN93_VF_R_IN_CTL_D_ROR BIT_ULL(5) +#define CN93_VF_R_IN_CTL_NSR BIT_ULL(3) +#define CN93_VF_R_IN_CTL_ESR BIT_ULL(1) +#define CN93_VF_R_IN_CTL_ROR BIT_ULL(0) + +#define CN93_VF_R_IN_CTL_MASK (CN93_VF_R_IN_CTL_RDSIZE | CN93_VF_R_IN_CTL_IS_64B) + +/*###################### RING OUT REGISTERS #########################*/ +#define CN93_VF_SDP_R_OUT_CNTS_START 0x10100 +#define CN93_VF_SDP_R_OUT_INT_LEVELS_START 0x10110 +#define CN93_VF_SDP_R_OUT_SLIST_BADDR_START 0x10120 +#define CN93_VF_SDP_R_OUT_SLIST_RSIZE_START 0x10130 +#define CN93_VF_SDP_R_OUT_SLIST_DBELL_START 0x10140 +#define CN93_VF_SDP_R_OUT_CONTROL_START 0x10150 +#define CN93_VF_SDP_R_OUT_ENABLE_START 0x10160 +#define CN93_VF_SDP_R_OUT_PKT_CNT_START 0x10180 +#define CN93_VF_SDP_R_OUT_BYTE_CNT_START 0x10190 + +#define CN93_VF_SDP_R_OUT_CONTROL(ring) \ + (CN93_VF_SDP_R_OUT_CONTROL_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_ENABLE(ring) \ + (CN93_VF_SDP_R_OUT_ENABLE_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_SLIST_BADDR(ring) \ + (CN93_VF_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_SLIST_RSIZE(ring) \ + (CN93_VF_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_SLIST_DBELL(ring) \ + (CN93_VF_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_CNTS(ring) \ + (CN93_VF_SDP_R_OUT_CNTS_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_INT_LEVELS(ring) \ + (CN93_VF_SDP_R_OUT_INT_LEVELS_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_PKT_CNT(ring) \ + (CN93_VF_SDP_R_OUT_PKT_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_OUT_BYTE_CNT(ring) \ + (CN93_VF_SDP_R_OUT_BYTE_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) + +/*------------------ R_OUT Masks ----------------*/ +#define CN93_VF_R_OUT_INT_LEVELS_BMODE BIT_ULL(63) +#define CN93_VF_R_OUT_INT_LEVELS_TIMET (32) + +#define CN93_VF_R_OUT_CTL_IDLE BIT_ULL(40) +#define CN93_VF_R_OUT_CTL_ES_I BIT_ULL(34) +#define CN93_VF_R_OUT_CTL_NSR_I BIT_ULL(33) +#define CN93_VF_R_OUT_CTL_ROR_I BIT_ULL(32) +#define CN93_VF_R_OUT_CTL_ES_D BIT_ULL(30) +#define CN93_VF_R_OUT_CTL_NSR_D BIT_ULL(29) +#define CN93_VF_R_OUT_CTL_ROR_D BIT_ULL(28) +#define CN93_VF_R_OUT_CTL_ES_P BIT_ULL(26) +#define CN93_VF_R_OUT_CTL_NSR_P BIT_ULL(25) +#define CN93_VF_R_OUT_CTL_ROR_P BIT_ULL(24) +#define CN93_VF_R_OUT_CTL_IMODE BIT_ULL(23) + +/* ##################### Mail Box Registers ########################## */ +/* SDP PF to VF Mailbox Data Register */ +#define CN93_VF_SDP_R_MBOX_PF_VF_DATA_START 0x10210 +/* SDP Packet PF to VF Mailbox Interrupt Register */ +#define CN93_VF_SDP_R_MBOX_PF_VF_INT_START 0x10220 +/* SDP VF to PF Mailbox Data Register */ +#define CN93_VF_SDP_R_MBOX_VF_PF_DATA_START 0x10230 + +#define CN93_VF_SDP_R_MBOX_PF_VF_INT_ENAB BIT_ULL(1) +#define CN93_VF_SDP_R_MBOX_PF_VF_INT_STATUS BIT_ULL(0) + +#define CN93_VF_SDP_R_MBOX_PF_VF_DATA(ring) \ + (CN93_VF_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_MBOX_PF_VF_INT(ring) \ + (CN93_VF_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CN93_VF_RING_OFFSET)) + +#define CN93_VF_SDP_R_MBOX_VF_PF_DATA(ring) \ + (CN93_VF_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CN93_VF_RING_OFFSET)) +#endif /* _OCTEP_VF_REGS_CN9K_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h new file mode 100644 index 000000000000..2e156745ef64 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ +#ifndef _OCTEP_VF_REGS_CNXK_H_ +#define _OCTEP_VF_REGS_CNXK_H_ + +/*############################ RST #########################*/ +#define CNXK_VF_CONFIG_XPANSION_BAR 0x38 +#define CNXK_VF_CONFIG_PCIE_CAP 0x70 +#define CNXK_VF_CONFIG_PCIE_DEVCAP 0x74 +#define CNXK_VF_CONFIG_PCIE_DEVCTL 0x78 +#define CNXK_VF_CONFIG_PCIE_LINKCAP 0x7C +#define CNXK_VF_CONFIG_PCIE_LINKCTL 0x80 +#define CNXK_VF_CONFIG_PCIE_SLOTCAP 0x84 +#define CNXK_VF_CONFIG_PCIE_SLOTCTL 0x88 + +#define CNXK_VF_RING_OFFSET (0x1ULL << 17) + +/*###################### RING IN REGISTERS #########################*/ +#define CNXK_VF_SDP_R_IN_CONTROL_START 0x10000 +#define CNXK_VF_SDP_R_IN_ENABLE_START 0x10010 +#define CNXK_VF_SDP_R_IN_INSTR_BADDR_START 0x10020 +#define CNXK_VF_SDP_R_IN_INSTR_RSIZE_START 0x10030 +#define CNXK_VF_SDP_R_IN_INSTR_DBELL_START 0x10040 +#define CNXK_VF_SDP_R_IN_CNTS_START 0x10050 +#define CNXK_VF_SDP_R_IN_INT_LEVELS_START 0x10060 +#define CNXK_VF_SDP_R_IN_PKT_CNT_START 0x10080 +#define CNXK_VF_SDP_R_IN_BYTE_CNT_START 0x10090 +#define CNXK_VF_SDP_R_ERR_TYPE_START 0x10400 + +#define CNXK_VF_SDP_R_ERR_TYPE(ring) \ + (CNXK_VF_SDP_R_ERR_TYPE_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_CONTROL(ring) \ + (CNXK_VF_SDP_R_IN_CONTROL_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_ENABLE(ring) \ + (CNXK_VF_SDP_R_IN_ENABLE_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_INSTR_BADDR(ring) \ + (CNXK_VF_SDP_R_IN_INSTR_BADDR_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_INSTR_RSIZE(ring) \ + (CNXK_VF_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_INSTR_DBELL(ring) \ + (CNXK_VF_SDP_R_IN_INSTR_DBELL_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_CNTS(ring) \ + (CNXK_VF_SDP_R_IN_CNTS_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_INT_LEVELS(ring) \ + (CNXK_VF_SDP_R_IN_INT_LEVELS_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_PKT_CNT(ring) \ + (CNXK_VF_SDP_R_IN_PKT_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_IN_BYTE_CNT(ring) \ + (CNXK_VF_SDP_R_IN_BYTE_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) + +/*------------------ R_IN Masks ----------------*/ + +/** Rings per Virtual Function **/ +#define CNXK_VF_R_IN_CTL_RPVF_MASK (0xF) +#define CNXK_VF_R_IN_CTL_RPVF_POS (48) + +/* Number of instructions to be read in one MAC read request. + * setting to Max value(4) + **/ +#define CNXK_VF_R_IN_CTL_IDLE (0x1ULL << 28) +#define CNXK_VF_R_IN_CTL_RDSIZE (0x3ULL << 25) +#define CNXK_VF_R_IN_CTL_IS_64B (0x1ULL << 24) +#define CNXK_VF_R_IN_CTL_D_NSR (0x1ULL << 8) +#define CNXK_VF_R_IN_CTL_D_ESR (0x1ULL << 6) +#define CNXK_VF_R_IN_CTL_D_ROR (0x1ULL << 5) +#define CNXK_VF_R_IN_CTL_NSR (0x1ULL << 3) +#define CNXK_VF_R_IN_CTL_ESR (0x1ULL << 1) +#define CNXK_VF_R_IN_CTL_ROR (0x1ULL << 0) + +#define CNXK_VF_R_IN_CTL_MASK (CNXK_VF_R_IN_CTL_RDSIZE | CNXK_VF_R_IN_CTL_IS_64B) + +/*###################### RING OUT REGISTERS #########################*/ +#define CNXK_VF_SDP_R_OUT_CNTS_START 0x10100 +#define CNXK_VF_SDP_R_OUT_INT_LEVELS_START 0x10110 +#define CNXK_VF_SDP_R_OUT_SLIST_BADDR_START 0x10120 +#define CNXK_VF_SDP_R_OUT_SLIST_RSIZE_START 0x10130 +#define CNXK_VF_SDP_R_OUT_SLIST_DBELL_START 0x10140 +#define CNXK_VF_SDP_R_OUT_CONTROL_START 0x10150 +#define CNXK_VF_SDP_R_OUT_WMARK_START 0x10160 +#define CNXK_VF_SDP_R_OUT_ENABLE_START 0x10170 +#define CNXK_VF_SDP_R_OUT_PKT_CNT_START 0x10180 +#define CNXK_VF_SDP_R_OUT_BYTE_CNT_START 0x10190 + +#define CNXK_VF_SDP_R_OUT_CONTROL(ring) \ + (CNXK_VF_SDP_R_OUT_CONTROL_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_ENABLE(ring) \ + (CNXK_VF_SDP_R_OUT_ENABLE_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_SLIST_BADDR(ring) \ + (CNXK_VF_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_SLIST_RSIZE(ring) \ + (CNXK_VF_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_SLIST_DBELL(ring) \ + (CNXK_VF_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_WMARK(ring) \ + (CNXK_VF_SDP_R_OUT_WMARK_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_CNTS(ring) \ + (CNXK_VF_SDP_R_OUT_CNTS_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_INT_LEVELS(ring) \ + (CNXK_VF_SDP_R_OUT_INT_LEVELS_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_PKT_CNT(ring) \ + (CNXK_VF_SDP_R_OUT_PKT_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_OUT_BYTE_CNT(ring) \ + (CNXK_VF_SDP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) + +/*------------------ R_OUT Masks ----------------*/ +#define CNXK_VF_R_OUT_INT_LEVELS_BMODE BIT_ULL(63) +#define CNXK_VF_R_OUT_INT_LEVELS_TIMET (32) + +#define CNXK_VF_R_OUT_CTL_IDLE BIT_ULL(40) +#define CNXK_VF_R_OUT_CTL_ES_I BIT_ULL(34) +#define CNXK_VF_R_OUT_CTL_NSR_I BIT_ULL(33) +#define CNXK_VF_R_OUT_CTL_ROR_I BIT_ULL(32) +#define CNXK_VF_R_OUT_CTL_ES_D BIT_ULL(30) +#define CNXK_VF_R_OUT_CTL_NSR_D BIT_ULL(29) +#define CNXK_VF_R_OUT_CTL_ROR_D BIT_ULL(28) +#define CNXK_VF_R_OUT_CTL_ES_P BIT_ULL(26) +#define CNXK_VF_R_OUT_CTL_NSR_P BIT_ULL(25) +#define CNXK_VF_R_OUT_CTL_ROR_P BIT_ULL(24) +#define CNXK_VF_R_OUT_CTL_IMODE BIT_ULL(23) + +/* ##################### Mail Box Registers ########################## */ +/* SDP PF to VF Mailbox Data Register */ +#define CNXK_VF_SDP_R_MBOX_PF_VF_DATA_START 0x10210 +/* SDP Packet PF to VF Mailbox Interrupt Register */ +#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_START 0x10220 +/* SDP VF to PF Mailbox Data Register */ +#define CNXK_VF_SDP_R_MBOX_VF_PF_DATA_START 0x10230 + +#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_ENAB BIT_ULL(1) +#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_STATUS BIT_ULL(0) + +#define CNXK_VF_SDP_R_MBOX_PF_VF_DATA(ring) \ + (CNXK_VF_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_MBOX_PF_VF_INT(ring) \ + (CNXK_VF_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_VF_RING_OFFSET)) + +#define CNXK_VF_SDP_R_MBOX_VF_PF_DATA(ring) \ + (CNXK_VF_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_VF_RING_OFFSET)) +#endif /* _OCTEP_VF_REGS_CNXK_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c new file mode 100644 index 000000000000..4f1a8157ce39 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include +#include + +#include "octep_vf_config.h" +#include "octep_vf_main.h" + +/** + * octep_vf_setup_oqs() - setup resources for all Rx queues. + * + * @oct: Octeon device private data structure. + */ +int octep_vf_setup_oqs(struct octep_vf_device *oct) +{ + return -1; +} + +/** + * octep_vf_oq_dbell_init() - Initialize Rx queue doorbell. + * + * @oct: Octeon device private data structure. + * + * Write number of descriptors to Rx queue doorbell register. + */ +void octep_vf_oq_dbell_init(struct octep_vf_device *oct) +{ +} + +/** + * octep_vf_free_oqs() - Free resources of all Rx queues. + * + * @oct: Octeon device private data structure. + */ +void octep_vf_free_oqs(struct octep_vf_device *oct) +{ +} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h new file mode 100644 index 000000000000..fe46838b5200 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_VF_RX_H_ +#define _OCTEP_VF_RX_H_ + +/* struct octep_vf_oq_desc_hw - Octeon Hardware OQ descriptor format. + * + * The descriptor ring is made of descriptors which have 2 64-bit values: + * + * @buffer_ptr: DMA address of the skb->data + * @info_ptr: DMA address of host memory, used to update pkt count by hw. + * This is currently unused to save pci writes. + */ +struct octep_vf_oq_desc_hw { + dma_addr_t buffer_ptr; + u64 info_ptr; +}; + +static_assert(sizeof(struct octep_vf_oq_desc_hw) == 16); + +#define OCTEP_VF_OQ_DESC_SIZE (sizeof(struct octep_vf_oq_desc_hw)) + +/* Rx offload flags */ +#define OCTEP_VF_RX_OFFLOAD_VLAN_STRIP BIT(0) +#define OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM BIT(1) +#define OCTEP_VF_RX_OFFLOAD_UDP_CKSUM BIT(2) +#define OCTEP_VF_RX_OFFLOAD_TCP_CKSUM BIT(3) + +#define OCTEP_VF_RX_OFFLOAD_CKSUM (OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM | \ + OCTEP_VF_RX_OFFLOAD_UDP_CKSUM | \ + OCTEP_VF_RX_OFFLOAD_TCP_CKSUM) + +#define OCTEP_VF_RX_IP_CSUM(flags) ((flags) & \ + (OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM | \ + OCTEP_VF_RX_OFFLOAD_TCP_CKSUM | \ + OCTEP_VF_RX_OFFLOAD_UDP_CKSUM)) + +/* bit 0 is vlan strip */ +#define OCTEP_VF_RX_CSUM_IP_VERIFIED BIT(1) +#define OCTEP_VF_RX_CSUM_L4_VERIFIED BIT(2) + +#define OCTEP_VF_RX_CSUM_VERIFIED(flags) ((flags) & \ + (OCTEP_VF_RX_CSUM_L4_VERIFIED | \ + OCTEP_VF_RX_CSUM_IP_VERIFIED)) + +/* Extended Response Header in packet data received from Hardware. + * Includes metadata like checksum status. + * this is valid only if hardware/firmware published support for this. + * This is at offset 0 of packet data (skb->data). + */ +struct octep_vf_oq_resp_hw_ext { + /* Reserved. */ + u64 rsvd:48; + + /* rx offload flags */ + u16 rx_ol_flags; +}; + +static_assert(sizeof(struct octep_vf_oq_resp_hw_ext) == 8); + +#define OCTEP_VF_OQ_RESP_HW_EXT_SIZE (sizeof(struct octep_vf_oq_resp_hw_ext)) + +/* Length of Rx packet DMA'ed by Octeon to Host. + * this is in bigendian; so need to be converted to cpu endian. + * Octeon writes this at the beginning of Rx buffer (skb->data). + */ +struct octep_vf_oq_resp_hw { + /* The Length of the packet. */ + __be64 length; +}; + +static_assert(sizeof(struct octep_vf_oq_resp_hw) == 8); + +#define OCTEP_VF_OQ_RESP_HW_SIZE (sizeof(struct octep_vf_oq_resp_hw)) + +/* Pointer to data buffer. + * Driver keeps a pointer to the data buffer that it made available to + * the Octeon device. Since the descriptor ring keeps physical (bus) + * addresses, this field is required for the driver to keep track of + * the virtual address pointers. The fields are operated by + * OS-dependent routines. + */ +struct octep_vf_rx_buffer { + struct page *page; + + /* length from rx hardware descriptor after converting to cpu endian */ + u64 len; +}; + +#define OCTEP_VF_OQ_RECVBUF_SIZE (sizeof(struct octep_vf_rx_buffer)) + +/* Output Queue statistics. Each output queue has four stats fields. */ +struct octep_vf_oq_stats { + /* Number of packets received from the Device. */ + u64 packets; + + /* Number of bytes received from the Device. */ + u64 bytes; + + /* Number of times failed to allocate buffers. */ + u64 alloc_failures; +}; + +#define OCTEP_VF_OQ_STATS_SIZE (sizeof(struct octep_vf_oq_stats)) + +/* Hardware interface Rx statistics */ +struct octep_vf_iface_rx_stats { + /* Received packets */ + u64 pkts; + + /* Octets of received packets */ + u64 octets; + + /* Received PAUSE and Control packets */ + u64 pause_pkts; + + /* Received PAUSE and Control octets */ + u64 pause_octets; + + /* Filtered DMAC0 packets */ + u64 dmac0_pkts; + + /* Filtered DMAC0 octets */ + u64 dmac0_octets; + + /* Packets dropped due to RX FIFO full */ + u64 dropped_pkts_fifo_full; + + /* Octets dropped due to RX FIFO full */ + u64 dropped_octets_fifo_full; + + /* Error packets */ + u64 err_pkts; + + /* Filtered DMAC1 packets */ + u64 dmac1_pkts; + + /* Filtered DMAC1 octets */ + u64 dmac1_octets; + + /* NCSI-bound packets dropped */ + u64 ncsi_dropped_pkts; + + /* NCSI-bound octets dropped */ + u64 ncsi_dropped_octets; + + /* Multicast packets received. */ + u64 mcast_pkts; + + /* Broadcast packets received. */ + u64 bcast_pkts; + +}; + +/* The Descriptor Ring Output Queue structure. + * This structure has all the information required to implement a + * Octeon OQ. + */ +struct octep_vf_oq { + u32 q_no; + + struct octep_vf_device *octep_vf_dev; + struct net_device *netdev; + struct device *dev; + + struct napi_struct *napi; + + /* The receive buffer list. This list has the virtual addresses + * of the buffers. + */ + struct octep_vf_rx_buffer *buff_info; + + /* Pointer to the mapped packet credit register. + * Host writes number of info/buffer ptrs available to this register + */ + u8 __iomem *pkts_credit_reg; + + /* Pointer to the mapped packet sent register. + * Octeon writes the number of packets DMA'ed to host memory + * in this register. + */ + u8 __iomem *pkts_sent_reg; + + /* Statistics for this OQ. */ + struct octep_vf_oq_stats stats; + + /* Packets pending to be processed */ + u32 pkts_pending; + u32 last_pkt_count; + + /* Index in the ring where the driver should read the next packet */ + u32 host_read_idx; + + /* Number of descriptors in this ring. */ + u32 max_count; + u32 ring_size_mask; + + /* The number of descriptors pending refill. */ + u32 refill_count; + + /* Index in the ring where the driver will refill the + * descriptor's buffer + */ + u32 host_refill_idx; + u32 refill_threshold; + + /* The size of each buffer pointed by the buffer pointer. */ + u32 buffer_size; + u32 max_single_buffer_size; + + /* The 8B aligned descriptor ring starts at this address. */ + struct octep_vf_oq_desc_hw *desc_ring; + + /* DMA mapped address of the OQ descriptor ring. */ + dma_addr_t desc_ring_dma; +}; + +#define OCTEP_VF_OQ_SIZE (sizeof(struct octep_vf_oq)) +#endif /* _OCTEP_VF_RX_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c new file mode 100644 index 000000000000..232ba479ecf6 --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#include +#include + +#include "octep_vf_config.h" +#include "octep_vf_main.h" + +/** + * octep_vf_clean_iqs() - Clean Tx queues to shutdown the device. + * + * @oct: Octeon device private data structure. + * + * Free the buffers in Tx queue descriptors pending completion and + * reset queue indices + */ +void octep_vf_clean_iqs(struct octep_vf_device *oct) +{ +} + +/** + * octep_vf_setup_iqs() - setup resources for all Tx queues. + * + * @oct: Octeon device private data structure. + */ +int octep_vf_setup_iqs(struct octep_vf_device *oct) +{ + return -1; +} + +/** + * octep_vf_free_iqs() - Free resources of all Tx queues. + * + * @oct: Octeon device private data structure. + */ +void octep_vf_free_iqs(struct octep_vf_device *oct) +{ +} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h new file mode 100644 index 000000000000..f338b975103c --- /dev/null +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h @@ -0,0 +1,276 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Marvell Octeon EP (EndPoint) VF Ethernet Driver + * + * Copyright (C) 2020 Marvell. + * + */ + +#ifndef _OCTEP_VF_TX_H_ +#define _OCTEP_VF_TX_H_ + +#define IQ_SEND_OK 0 +#define IQ_SEND_STOP 1 +#define IQ_SEND_FAILED -1 + +#define TX_BUFTYPE_NONE 0 +#define TX_BUFTYPE_NET 1 +#define TX_BUFTYPE_NET_SG 2 +#define NUM_TX_BUFTYPES 3 + +/* Hardware format for Scatter/Gather list + * + * 63 48|47 32|31 16|15 0 + * ----------------------------------------- + * | Len 0 | Len 1 | Len 2 | Len 3 | + * ----------------------------------------- + * | Ptr 0 | + * ----------------------------------------- + * | Ptr 1 | + * ----------------------------------------- + * | Ptr 2 | + * ----------------------------------------- + * | Ptr 3 | + * ----------------------------------------- + */ +struct octep_vf_tx_sglist_desc { + u16 len[4]; + dma_addr_t dma_ptr[4]; +}; + +static_assert(sizeof(struct octep_vf_tx_sglist_desc) == 40); + +/* Each Scatter/Gather entry sent to hardwar hold four pointers. + * So, number of entries required is (MAX_SKB_FRAGS + 1)/4, where '+1' + * is for main skb which also goes as a gather buffer to Octeon hardware. + * To allocate sufficient SGLIST entries for a packet with max fragments, + * align by adding 3 before calcuating max SGLIST entries per packet. + */ +#define OCTEP_VF_SGLIST_ENTRIES_PER_PKT ((MAX_SKB_FRAGS + 1 + 3) / 4) +#define OCTEP_VF_SGLIST_SIZE_PER_PKT \ + (OCTEP_VF_SGLIST_ENTRIES_PER_PKT * sizeof(struct octep_vf_tx_sglist_desc)) + +struct octep_vf_tx_buffer { + struct sk_buff *skb; + dma_addr_t dma; + struct octep_vf_tx_sglist_desc *sglist; + dma_addr_t sglist_dma; + u8 gather; +}; + +#define OCTEP_VF_IQ_TXBUFF_INFO_SIZE (sizeof(struct octep_vf_tx_buffer)) + +/* VF Hardware interface Tx statistics */ +struct octep_vf_iface_tx_stats { + /* Total frames sent on the interface */ + u64 pkts; + + /* Total octets sent on the interface */ + u64 octs; + + /* Packets sent to a broadcast DMAC */ + u64 bcst; + + /* Packets sent to the multicast DMAC */ + u64 mcst; + + /* Packets dropped */ + u64 dropped; + + /* Reserved */ + u64 reserved[13]; +}; + +/* VF Input Queue statistics */ +struct octep_vf_iq_stats { + /* Instructions posted to this queue. */ + u64 instr_posted; + + /* Instructions copied by hardware for processing. */ + u64 instr_completed; + + /* Instructions that could not be processed. */ + u64 instr_dropped; + + /* Bytes sent through this queue. */ + u64 bytes_sent; + + /* Gather entries sent through this queue. */ + u64 sgentry_sent; + + /* Number of transmit failures due to TX_BUSY */ + u64 tx_busy; + + /* Number of times the queue is restarted */ + u64 restart_cnt; +}; + +/* The instruction (input) queue. + * The input queue is used to post raw (instruction) mode data or packet + * data to Octeon device from the host. Each input queue (up to 4) for + * a Octeon device has one such structure to represent it. + */ +struct octep_vf_iq { + u32 q_no; + + struct octep_vf_device *octep_vf_dev; + struct net_device *netdev; + struct device *dev; + struct netdev_queue *netdev_q; + + /* Index in input ring where driver should write the next packet */ + u16 host_write_index; + + /* Index in input ring where Octeon is expected to read next packet */ + u16 octep_vf_read_index; + + /* This index aids in finding the window in the queue where Octeon + * has read the commands. + */ + u16 flush_index; + + /* Statistics for this input queue. */ + struct octep_vf_iq_stats stats; + + /* Pointer to the Virtual Base addr of the input ring. */ + struct octep_vf_tx_desc_hw *desc_ring; + + /* DMA mapped base address of the input descriptor ring. */ + dma_addr_t desc_ring_dma; + + /* Info of Tx buffers pending completion. */ + struct octep_vf_tx_buffer *buff_info; + + /* Base pointer to Scatter/Gather lists for all ring descriptors. */ + struct octep_vf_tx_sglist_desc *sglist; + + /* DMA mapped addr of Scatter Gather Lists */ + dma_addr_t sglist_dma; + + /* Octeon doorbell register for the ring. */ + u8 __iomem *doorbell_reg; + + /* Octeon instruction count register for this ring. */ + u8 __iomem *inst_cnt_reg; + + /* interrupt level register for this ring */ + u8 __iomem *intr_lvl_reg; + + /* Maximum no. of instructions in this queue. */ + u32 max_count; + u32 ring_size_mask; + + u32 pkt_in_done; + u32 pkts_processed; + + u32 status; + + /* Number of instructions pending to be posted to Octeon. */ + u32 fill_cnt; + + /* The max. number of instructions that can be held pending by the + * driver before ringing doorbell. + */ + u32 fill_threshold; +}; + +/* Hardware Tx Instruction Header */ +struct octep_vf_instr_hdr { + /* Data Len */ + u64 tlen:16; + + /* Reserved */ + u64 rsvd:20; + + /* PKIND for SDP */ + u64 pkind:6; + + /* Front Data size */ + u64 fsz:6; + + /* No. of entries in gather list */ + u64 gsz:14; + + /* Gather indicator 1=gather*/ + u64 gather:1; + + /* Reserved3 */ + u64 reserved3:1; +}; + +static_assert(sizeof(struct octep_vf_instr_hdr) == 8); + +/* Tx offload flags */ +#define OCTEP_VF_TX_OFFLOAD_VLAN_INSERT BIT(0) +#define OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM BIT(1) +#define OCTEP_VF_TX_OFFLOAD_UDP_CKSUM BIT(2) +#define OCTEP_VF_TX_OFFLOAD_TCP_CKSUM BIT(3) +#define OCTEP_VF_TX_OFFLOAD_SCTP_CKSUM BIT(4) +#define OCTEP_VF_TX_OFFLOAD_TCP_TSO BIT(5) +#define OCTEP_VF_TX_OFFLOAD_UDP_TSO BIT(6) + +#define OCTEP_VF_TX_OFFLOAD_CKSUM (OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM | \ + OCTEP_VF_TX_OFFLOAD_UDP_CKSUM | \ + OCTEP_VF_TX_OFFLOAD_TCP_CKSUM) + +#define OCTEP_VF_TX_OFFLOAD_TSO (OCTEP_VF_TX_OFFLOAD_TCP_TSO | \ + OCTEP_VF_TX_OFFLOAD_UDP_TSO) + +#define OCTEP_VF_TX_IP_CSUM(flags) ((flags) & \ + (OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM | \ + OCTEP_VF_TX_OFFLOAD_TCP_CKSUM | \ + OCTEP_VF_TX_OFFLOAD_UDP_CKSUM)) + +#define OCTEP_VF_TX_TSO(flags) ((flags) & \ + (OCTEP_VF_TX_OFFLOAD_TCP_TSO | \ + OCTEP_VF_TX_OFFLOAD_UDP_TSO)) + +struct tx_mdata { + /* offload flags */ + u16 ol_flags; + + /* gso size */ + u16 gso_size; + + /* gso flags */ + u16 gso_segs; + + /* reserved */ + u16 rsvd1; + + /* reserved */ + u64 rsvd2; +}; + +static_assert(sizeof(struct tx_mdata) == 16); + +/* 64-byte Tx instruction format. + * Format of instruction for a 64-byte mode input queue. + * + * only first 16-bytes (dptr and ih) are mandatory; rest are optional + * and filled by the driver based on firmware/hardware capabilities. + * These optional headers together called Front Data and its size is + * described by ih->fsz. + */ +struct octep_vf_tx_desc_hw { + /* Pointer where the input data is available. */ + u64 dptr; + + /* Instruction Header. */ + union { + struct octep_vf_instr_hdr ih; + u64 ih64; + }; + + union { + u64 txm64[2]; + struct tx_mdata txm; + }; + + /* Additional headers available in a 64-byte instruction. */ + u64 exhdr[4]; +}; + +static_assert(sizeof(struct octep_vf_tx_desc_hw) == 64); + +#define OCTEP_VF_IQ_DESC_SIZE (sizeof(struct octep_vf_tx_desc_hw)) +#endif /* _OCTEP_VF_TX_H_ */ -- cgit v1.3.1 From 060baa9b90d4e14eac7123abc563070dd30b21a2 Mon Sep 17 00:00:00 2001 From: Claudiu Beznea Date: Thu, 7 Dec 2023 09:06:57 +0200 Subject: dt-bindings: net: renesas,etheravb: Document RZ/G3S support Document Ethernet RZ/G3S support. Ethernet IP is similar to the one available on RZ/G2L devices. Signed-off-by: Claudiu Beznea Acked-by: Conor Dooley Reviewed-by: Sergey Shtylyov Reviewed-by: Geert Uytterhoeven Signed-off-by: Jakub Kicinski --- Documentation/devicetree/bindings/net/renesas,etheravb.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml index d3306b186000..890f7858d0dc 100644 --- a/Documentation/devicetree/bindings/net/renesas,etheravb.yaml +++ b/Documentation/devicetree/bindings/net/renesas,etheravb.yaml @@ -58,6 +58,7 @@ properties: - renesas,r9a07g043-gbeth # RZ/G2UL and RZ/Five - renesas,r9a07g044-gbeth # RZ/G2{L,LC} - renesas,r9a07g054-gbeth # RZ/V2L + - renesas,r9a08g045-gbeth # RZ/G3S - const: renesas,rzg2l-gbeth # RZ/{G2L,G2UL,V2L} family reg: true -- cgit v1.3.1 From 292010ee509443a1383bb079a4fa80515aa89a7f Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 20 Dec 2023 16:19:47 +0100 Subject: kunit: add parameter generation macro using description from array The existing KUNIT_ARRAY_PARAM macro requires a separate function to get the description. However, in a lot of cases the description can just be copied directly from the array. Add a second macro that avoids having to write a static function just for a single strscpy. Signed-off-by: Benjamin Berg Reviewed-by: David Gow Link: https://msgid.link/20231220151952.415232-2-benjamin@sipsolutions.net Signed-off-by: Johannes Berg --- Documentation/dev-tools/kunit/usage.rst | 12 ++++-------- include/kunit/test.h | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 8 deletions(-) (limited to 'Documentation') diff --git a/Documentation/dev-tools/kunit/usage.rst b/Documentation/dev-tools/kunit/usage.rst index c27e1646ecd9..b959e5befcbe 100644 --- a/Documentation/dev-tools/kunit/usage.rst +++ b/Documentation/dev-tools/kunit/usage.rst @@ -566,13 +566,9 @@ By reusing the same ``cases`` array from above, we can write the test as a }, }; - // Need a helper function to generate a name for each test case. - static void case_to_desc(const struct sha1_test_case *t, char *desc) - { - strcpy(desc, t->str); - } - // Creates `sha1_gen_params()` to iterate over `cases`. - KUNIT_ARRAY_PARAM(sha1, cases, case_to_desc); + // Creates `sha1_gen_params()` to iterate over `cases` while using + // the struct member `str` for the case description. + KUNIT_ARRAY_PARAM_DESC(sha1, cases, str); // Looks no different from a normal test. static void sha1_test(struct kunit *test) @@ -588,7 +584,7 @@ By reusing the same ``cases`` array from above, we can write the test as a } // Instead of KUNIT_CASE, we use KUNIT_CASE_PARAM and pass in the - // function declared by KUNIT_ARRAY_PARAM. + // function declared by KUNIT_ARRAY_PARAM or KUNIT_ARRAY_PARAM_DESC. static struct kunit_case sha1_test_cases[] = { KUNIT_CASE_PARAM(sha1_test, sha1_gen_params), {} diff --git a/include/kunit/test.h b/include/kunit/test.h index 20ed9f9275c9..2dfa851e1f88 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -1514,6 +1514,25 @@ do { \ return NULL; \ } +/** + * KUNIT_ARRAY_PARAM_DESC() - Define test parameter generator from an array. + * @name: prefix for the test parameter generator function. + * @array: array of test parameters. + * @desc_member: structure member from array element to use as description + * + * Define function @name_gen_params which uses @array to generate parameters. + */ +#define KUNIT_ARRAY_PARAM_DESC(name, array, desc_member) \ + static const void *name##_gen_params(const void *prev, char *desc) \ + { \ + typeof((array)[0]) *__next = prev ? ((typeof(__next)) prev) + 1 : (array); \ + if (__next - (array) < ARRAY_SIZE((array))) { \ + strscpy(desc, __next->desc_member, KUNIT_PARAM_DESC_SIZE); \ + return __next; \ + } \ + return NULL; \ + } + // TODO(dlatypov@google.com): consider eventually migrating users to explicitly // include resource.h themselves if they need it. #include -- cgit v1.3.1 From d000574d02870710c62751148cbfe22993222b98 Mon Sep 17 00:00:00 2001 From: David Arinzon Date: Mon, 1 Jan 2024 19:08:45 +0000 Subject: net: ena: Move XDP code to its new files XDP system has a very large footprint in the driver's overall code. makes the whole driver's code much harder to read. Moving XDP code to dedicated files. This patch doesn't make any changes to the code itself and only cut-pastes the code into ena_xdp.c and ena_xdp.h files so the change is purely cosmetic. Signed-off-by: Shay Agroskin Signed-off-by: David Arinzon Link: https://lore.kernel.org/r/20240101190855.18739-2-darinzon@amazon.com Signed-off-by: Jakub Kicinski --- .../device_drivers/ethernet/amazon/ena.rst | 1 + drivers/net/ethernet/amazon/ena/Makefile | 2 +- drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 + drivers/net/ethernet/amazon/ena/ena_netdev.c | 634 +-------------------- drivers/net/ethernet/amazon/ena/ena_netdev.h | 82 ++- drivers/net/ethernet/amazon/ena/ena_xdp.c | 465 +++++++++++++++ drivers/net/ethernet/amazon/ena/ena_xdp.h | 152 +++++ 7 files changed, 680 insertions(+), 657 deletions(-) create mode 100644 drivers/net/ethernet/amazon/ena/ena_xdp.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_xdp.h (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst index 5eaa3ab6c73e..b842bcb14255 100644 --- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst +++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst @@ -54,6 +54,7 @@ ena_common_defs.h Common definitions for ena_com layer. ena_regs_defs.h Definition of ENA PCI memory-mapped (MMIO) registers. ena_netdev.[ch] Main Linux kernel driver. ena_ethtool.c ethtool callbacks. +ena_xdp.[ch] XDP files ena_pci_id_tbl.h Supported device IDs. ================= ====================================================== diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile index f1f752a8f7bb..6ab615365172 100644 --- a/drivers/net/ethernet/amazon/ena/Makefile +++ b/drivers/net/ethernet/amazon/ena/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_ENA_ETHERNET) += ena.o -ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o +ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o ena_xdp.o diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index 2d8c5a2841b8..724af7dfefae 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -7,6 +7,7 @@ #include #include "ena_netdev.h" +#include "ena_xdp.h" struct ena_stats { char name[ETH_GSTRING_LEN]; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c index afd1b7ce0013..0a0d97c4044c 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -19,8 +19,8 @@ #include #include "ena_netdev.h" -#include #include "ena_pci_id_tbl.h" +#include "ena_xdp.h" MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); MODULE_DESCRIPTION(DEVICE_NAME); @@ -45,53 +45,6 @@ static void check_for_admin_com_state(struct ena_adapter *adapter); static void ena_destroy_device(struct ena_adapter *adapter, bool graceful); static int ena_restore_device(struct ena_adapter *adapter); -static void ena_init_io_rings(struct ena_adapter *adapter, - int first_index, int count); -static void ena_init_napi_in_range(struct ena_adapter *adapter, int first_index, - int count); -static void ena_del_napi_in_range(struct ena_adapter *adapter, int first_index, - int count); -static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid); -static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, - int count); -static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid); -static void ena_free_tx_resources(struct ena_adapter *adapter, int qid); -static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget); -static void ena_destroy_all_tx_queues(struct ena_adapter *adapter); -static void ena_free_all_io_tx_resources(struct ena_adapter *adapter); -static void ena_napi_disable_in_range(struct ena_adapter *adapter, - int first_index, int count); -static void ena_napi_enable_in_range(struct ena_adapter *adapter, - int first_index, int count); -static int ena_up(struct ena_adapter *adapter); -static void ena_down(struct ena_adapter *adapter); -static void ena_unmask_interrupt(struct ena_ring *tx_ring, - struct ena_ring *rx_ring); -static void ena_update_ring_numa_node(struct ena_ring *tx_ring, - struct ena_ring *rx_ring); -static void ena_unmap_tx_buff(struct ena_ring *tx_ring, - struct ena_tx_buffer *tx_info); -static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, - int first_index, int count); -static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, int count); - -/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */ -static void ena_increase_stat(u64 *statp, u64 cnt, - struct u64_stats_sync *syncp) -{ - u64_stats_update_begin(syncp); - (*statp) += cnt; - u64_stats_update_end(syncp); -} - -static void ena_ring_tx_doorbell(struct ena_ring *tx_ring) -{ - ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); - ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp); -} - static void ena_tx_timeout(struct net_device *dev, unsigned int txqueue) { struct ena_adapter *adapter = netdev_priv(dev); @@ -135,12 +88,12 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu) return ret; } -static int ena_xmit_common(struct net_device *dev, - struct ena_ring *ring, - struct ena_tx_buffer *tx_info, - struct ena_com_tx_ctx *ena_tx_ctx, - u16 next_to_use, - u32 bytes) +int ena_xmit_common(struct net_device *dev, + struct ena_ring *ring, + struct ena_tx_buffer *tx_info, + struct ena_com_tx_ctx *ena_tx_ctx, + u16 next_to_use, + u32 bytes) { struct ena_adapter *adapter = netdev_priv(dev); int rc, nb_hw_desc; @@ -186,467 +139,6 @@ static int ena_xmit_common(struct net_device *dev, return 0; } -/* This is the XDP napi callback. XDP queues use a separate napi callback - * than Rx/Tx queues. - */ -static int ena_xdp_io_poll(struct napi_struct *napi, int budget) -{ - struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); - u32 xdp_work_done, xdp_budget; - struct ena_ring *xdp_ring; - int napi_comp_call = 0; - int ret; - - xdp_ring = ena_napi->xdp_ring; - - xdp_budget = budget; - - if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || - test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { - napi_complete_done(napi, 0); - return 0; - } - - xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); - - /* If the device is about to reset or down, avoid unmask - * the interrupt and return 0 so NAPI won't reschedule - */ - if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { - napi_complete_done(napi, 0); - ret = 0; - } else if (xdp_budget > xdp_work_done) { - napi_comp_call = 1; - if (napi_complete_done(napi, xdp_work_done)) - ena_unmask_interrupt(xdp_ring, NULL); - ena_update_ring_numa_node(xdp_ring, NULL); - ret = xdp_work_done; - } else { - ret = xdp_budget; - } - - u64_stats_update_begin(&xdp_ring->syncp); - xdp_ring->tx_stats.napi_comp += napi_comp_call; - xdp_ring->tx_stats.tx_poll++; - u64_stats_update_end(&xdp_ring->syncp); - xdp_ring->tx_stats.last_napi_jiffies = jiffies; - - return ret; -} - -static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring, - struct ena_tx_buffer *tx_info, - struct xdp_frame *xdpf, - struct ena_com_tx_ctx *ena_tx_ctx) -{ - struct ena_adapter *adapter = xdp_ring->adapter; - struct ena_com_buf *ena_buf; - int push_len = 0; - dma_addr_t dma; - void *data; - u32 size; - - tx_info->xdpf = xdpf; - data = tx_info->xdpf->data; - size = tx_info->xdpf->len; - - if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { - /* Designate part of the packet for LLQ */ - push_len = min_t(u32, size, xdp_ring->tx_max_header_size); - - ena_tx_ctx->push_header = data; - - size -= push_len; - data += push_len; - } - - ena_tx_ctx->header_len = push_len; - - if (size > 0) { - dma = dma_map_single(xdp_ring->dev, - data, - size, - DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) - goto error_report_dma_error; - - tx_info->map_linear_data = 0; - - ena_buf = tx_info->bufs; - ena_buf->paddr = dma; - ena_buf->len = size; - - ena_tx_ctx->ena_bufs = ena_buf; - ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1; - } - - return 0; - -error_report_dma_error: - ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1, - &xdp_ring->syncp); - netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n"); - - return -EINVAL; -} - -static int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, - struct net_device *dev, - struct xdp_frame *xdpf, - int flags) -{ - struct ena_com_tx_ctx ena_tx_ctx = {}; - struct ena_tx_buffer *tx_info; - u16 next_to_use, req_id; - int rc; - - next_to_use = xdp_ring->next_to_use; - req_id = xdp_ring->free_ids[next_to_use]; - tx_info = &xdp_ring->tx_buffer_info[req_id]; - tx_info->num_of_bufs = 0; - - rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx); - if (unlikely(rc)) - return rc; - - ena_tx_ctx.req_id = req_id; - - rc = ena_xmit_common(dev, - xdp_ring, - tx_info, - &ena_tx_ctx, - next_to_use, - xdpf->len); - if (rc) - goto error_unmap_dma; - - /* trigger the dma engine. ena_ring_tx_doorbell() - * calls a memory barrier inside it. - */ - if (flags & XDP_XMIT_FLUSH) - ena_ring_tx_doorbell(xdp_ring); - - return rc; - -error_unmap_dma: - ena_unmap_tx_buff(xdp_ring, tx_info); - tx_info->xdpf = NULL; - return rc; -} - -static int ena_xdp_xmit(struct net_device *dev, int n, - struct xdp_frame **frames, u32 flags) -{ - struct ena_adapter *adapter = netdev_priv(dev); - struct ena_ring *xdp_ring; - int qid, i, nxmit = 0; - - if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) - return -EINVAL; - - if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) - return -ENETDOWN; - - /* We assume that all rings have the same XDP program */ - if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog)) - return -ENXIO; - - qid = smp_processor_id() % adapter->xdp_num_queues; - qid += adapter->xdp_first_ring; - xdp_ring = &adapter->tx_ring[qid]; - - /* Other CPU ids might try to send thorugh this queue */ - spin_lock(&xdp_ring->xdp_tx_lock); - - for (i = 0; i < n; i++) { - if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0)) - break; - nxmit++; - } - - /* Ring doorbell to make device aware of the packets */ - if (flags & XDP_XMIT_FLUSH) - ena_ring_tx_doorbell(xdp_ring); - - spin_unlock(&xdp_ring->xdp_tx_lock); - - /* Return number of packets sent */ - return nxmit; -} - -static int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) -{ - u32 verdict = ENA_XDP_PASS; - struct bpf_prog *xdp_prog; - struct ena_ring *xdp_ring; - struct xdp_frame *xdpf; - u64 *xdp_stat; - - xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); - - if (!xdp_prog) - goto out; - - verdict = bpf_prog_run_xdp(xdp_prog, xdp); - - switch (verdict) { - case XDP_TX: - xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) { - trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = ENA_XDP_DROP; - break; - } - - /* Find xmit queue */ - xdp_ring = rx_ring->xdp_ring; - - /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */ - spin_lock(&xdp_ring->xdp_tx_lock); - - if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, - XDP_XMIT_FLUSH)) - xdp_return_frame(xdpf); - - spin_unlock(&xdp_ring->xdp_tx_lock); - xdp_stat = &rx_ring->rx_stats.xdp_tx; - verdict = ENA_XDP_TX; - break; - case XDP_REDIRECT: - if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { - xdp_stat = &rx_ring->rx_stats.xdp_redirect; - verdict = ENA_XDP_REDIRECT; - break; - } - trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = ENA_XDP_DROP; - break; - case XDP_ABORTED: - trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_aborted; - verdict = ENA_XDP_DROP; - break; - case XDP_DROP: - xdp_stat = &rx_ring->rx_stats.xdp_drop; - verdict = ENA_XDP_DROP; - break; - case XDP_PASS: - xdp_stat = &rx_ring->rx_stats.xdp_pass; - verdict = ENA_XDP_PASS; - break; - default: - bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); - xdp_stat = &rx_ring->rx_stats.xdp_invalid; - verdict = ENA_XDP_DROP; - } - - ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); -out: - return verdict; -} - -static void ena_init_all_xdp_queues(struct ena_adapter *adapter) -{ - adapter->xdp_first_ring = adapter->num_io_queues; - adapter->xdp_num_queues = adapter->num_io_queues; - - ena_init_io_rings(adapter, - adapter->xdp_first_ring, - adapter->xdp_num_queues); -} - -static int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) -{ - u32 xdp_first_ring = adapter->xdp_first_ring; - u32 xdp_num_queues = adapter->xdp_num_queues; - int rc = 0; - - rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); - if (rc) - goto setup_err; - - rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); - if (rc) - goto create_err; - - return 0; - -create_err: - ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); -setup_err: - return rc; -} - -/* Provides a way for both kernel and bpf-prog to know - * more about the RX-queue a given XDP frame arrived on. - */ -static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) -{ - int rc; - - rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0); - - if (rc) { - netif_err(rx_ring->adapter, ifup, rx_ring->netdev, - "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", - rx_ring->qid, rc); - goto err; - } - - rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, - NULL); - - if (rc) { - netif_err(rx_ring->adapter, ifup, rx_ring->netdev, - "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", - rx_ring->qid, rc); - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - } - -err: - return rc; -} - -static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) -{ - xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); -} - -static void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, - struct bpf_prog *prog, - int first, int count) -{ - struct bpf_prog *old_bpf_prog; - struct ena_ring *rx_ring; - int i = 0; - - for (i = first; i < count; i++) { - rx_ring = &adapter->rx_ring[i]; - old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); - - if (!old_bpf_prog && prog) { - ena_xdp_register_rxq_info(rx_ring); - rx_ring->rx_headroom = XDP_PACKET_HEADROOM; - } else if (old_bpf_prog && !prog) { - ena_xdp_unregister_rxq_info(rx_ring); - rx_ring->rx_headroom = NET_SKB_PAD; - } - } -} - -static void ena_xdp_exchange_program(struct ena_adapter *adapter, - struct bpf_prog *prog) -{ - struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); - - ena_xdp_exchange_program_rx_in_range(adapter, - prog, - 0, - adapter->num_io_queues); - - if (old_bpf_prog) - bpf_prog_put(old_bpf_prog); -} - -static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) -{ - bool was_up; - int rc; - - was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); - - if (was_up) - ena_down(adapter); - - adapter->xdp_first_ring = 0; - adapter->xdp_num_queues = 0; - ena_xdp_exchange_program(adapter, NULL); - if (was_up) { - rc = ena_up(adapter); - if (rc) - return rc; - } - return 0; -} - -static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) -{ - struct ena_adapter *adapter = netdev_priv(netdev); - struct bpf_prog *prog = bpf->prog; - struct bpf_prog *old_bpf_prog; - int rc, prev_mtu; - bool is_up; - - is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); - rc = ena_xdp_allowed(adapter); - if (rc == ENA_XDP_ALLOWED) { - old_bpf_prog = adapter->xdp_bpf_prog; - if (prog) { - if (!is_up) { - ena_init_all_xdp_queues(adapter); - } else if (!old_bpf_prog) { - ena_down(adapter); - ena_init_all_xdp_queues(adapter); - } - ena_xdp_exchange_program(adapter, prog); - - if (is_up && !old_bpf_prog) { - rc = ena_up(adapter); - if (rc) - return rc; - } - xdp_features_set_redirect_target(netdev, false); - } else if (old_bpf_prog) { - xdp_features_clear_redirect_target(netdev); - rc = ena_destroy_and_free_all_xdp_queues(adapter); - if (rc) - return rc; - } - - prev_mtu = netdev->max_mtu; - netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; - - if (!old_bpf_prog) - netif_info(adapter, drv, adapter->netdev, - "XDP program is set, changing the max_mtu from %d to %d", - prev_mtu, netdev->max_mtu); - - } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { - netif_err(adapter, drv, adapter->netdev, - "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", - netdev->mtu, ENA_XDP_MAX_MTU); - NL_SET_ERR_MSG_MOD(bpf->extack, - "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); - return -EINVAL; - } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { - netif_err(adapter, drv, adapter->netdev, - "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", - adapter->num_io_queues, adapter->max_num_io_queues); - NL_SET_ERR_MSG_MOD(bpf->extack, - "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); - return -EINVAL; - } - - return 0; -} - -/* This is the main xdp callback, it's used by the kernel to set/unset the xdp - * program as well as to query the current xdp program id. - */ -static int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) -{ - switch (bpf->command) { - case XDP_SETUP_PROG: - return ena_xdp_set(netdev, bpf); - default: - return -EINVAL; - } - return 0; -} - static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) { #ifdef CONFIG_RFS_ACCEL @@ -688,8 +180,8 @@ static void ena_init_io_rings_common(struct ena_adapter *adapter, u64_stats_init(&ring->syncp); } -static void ena_init_io_rings(struct ena_adapter *adapter, - int first_index, int count) +void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev; struct ena_ring *txr, *rxr; @@ -820,9 +312,8 @@ static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) tx_ring->push_buf_intermediate_buf = NULL; } -static int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, - int count) +int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count) { int i, rc = 0; @@ -845,8 +336,8 @@ err_setup_tx: return rc; } -static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, - int first_index, int count) +void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count) { int i; @@ -859,7 +350,7 @@ static void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, * * Free all transmit software resources */ -static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) +void ena_free_all_io_tx_resources(struct ena_adapter *adapter) { ena_free_all_io_tx_resources_in_range(adapter, 0, @@ -1169,8 +660,8 @@ static void ena_free_all_rx_bufs(struct ena_adapter *adapter) ena_free_rx_bufs(adapter, i); } -static void ena_unmap_tx_buff(struct ena_ring *tx_ring, - struct ena_tx_buffer *tx_info) +void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info) { struct ena_com_buf *ena_buf; u32 cnt; @@ -1272,8 +763,8 @@ static void ena_destroy_all_io_queues(struct ena_adapter *adapter) ena_destroy_all_rx_queues(adapter); } -static int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, - struct ena_tx_buffer *tx_info, bool is_xdp) +int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, + struct ena_tx_buffer *tx_info, bool is_xdp) { if (tx_info) netif_err(ring->adapter, @@ -1305,17 +796,6 @@ static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) return handle_invalid_req_id(tx_ring, req_id, tx_info, false); } -static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) -{ - struct ena_tx_buffer *tx_info; - - tx_info = &xdp_ring->tx_buffer_info[req_id]; - if (likely(tx_info->xdpf)) - return 0; - - return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); -} - static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) { struct netdev_queue *txq; @@ -1688,6 +1168,7 @@ static int ena_xdp_handle_buff(struct ena_ring *rx_ring, struct xdp_buff *xdp, u return ret; } + /* ena_clean_rx_irq - Cleanup RX irq * @rx_ring: RX ring to clean * @napi: napi handler @@ -1880,8 +1361,8 @@ static void ena_adjust_adaptive_rx_intr_moderation(struct ena_napi *ena_napi) rx_ring->per_napi_packets = 0; } -static void ena_unmask_interrupt(struct ena_ring *tx_ring, - struct ena_ring *rx_ring) +void ena_unmask_interrupt(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) { u32 rx_interval = tx_ring->smoothed_interval; struct ena_eth_io_intr_reg intr_reg; @@ -1913,8 +1394,8 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring, ena_com_unmask_intr(tx_ring->ena_com_io_cq, &intr_reg); } -static void ena_update_ring_numa_node(struct ena_ring *tx_ring, - struct ena_ring *rx_ring) +void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) { int cpu = get_cpu(); int numa_node; @@ -1949,67 +1430,6 @@ out: put_cpu(); } -static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) -{ - u32 total_done = 0; - u16 next_to_clean; - int tx_pkts = 0; - u16 req_id; - int rc; - - if (unlikely(!xdp_ring)) - return 0; - next_to_clean = xdp_ring->next_to_clean; - - while (tx_pkts < budget) { - struct ena_tx_buffer *tx_info; - struct xdp_frame *xdpf; - - rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, - &req_id); - if (rc) { - if (unlikely(rc == -EINVAL)) - handle_invalid_req_id(xdp_ring, req_id, NULL, - true); - break; - } - - /* validate that the request id points to a valid xdp_frame */ - rc = validate_xdp_req_id(xdp_ring, req_id); - if (rc) - break; - - tx_info = &xdp_ring->tx_buffer_info[req_id]; - xdpf = tx_info->xdpf; - - tx_info->xdpf = NULL; - tx_info->last_jiffies = 0; - ena_unmap_tx_buff(xdp_ring, tx_info); - - netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, - "tx_poll: q %d skb %p completed\n", xdp_ring->qid, - xdpf); - - tx_pkts++; - total_done += tx_info->tx_descs; - - xdp_return_frame(xdpf); - xdp_ring->free_ids[next_to_clean] = req_id; - next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, - xdp_ring->ring_size); - } - - xdp_ring->next_to_clean = next_to_clean; - ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); - ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); - - netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, - "tx_poll: q %d done. total pkts: %d\n", - xdp_ring->qid, tx_pkts); - - return tx_pkts; -} - static int ena_io_poll(struct napi_struct *napi, int budget) { struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); @@ -2475,8 +1895,8 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) return rc; } -static int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, - int first_index, int count) +int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count) { struct ena_com_dev *ena_dev = adapter->ena_dev; int rc, i; @@ -2686,7 +2106,7 @@ err_setup_tx: } } -static int ena_up(struct ena_adapter *adapter) +int ena_up(struct ena_adapter *adapter) { int io_queue_count, rc, i; @@ -2748,7 +2168,7 @@ err_req_irq: return rc; } -static void ena_down(struct ena_adapter *adapter) +void ena_down(struct ena_adapter *adapter) { int io_queue_count = adapter->num_io_queues + adapter->xdp_num_queues; diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h index 33c923e1261a..041f08d20b45 100644 --- a/drivers/net/ethernet/amazon/ena/ena_netdev.h +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -110,19 +110,6 @@ #define ENA_MMIO_DISABLE_REG_READ BIT(0) -/* The max MTU size is configured to be the ethernet frame size without - * the overhead of the ethernet header, which can have a VLAN header, and - * a frame check sequence (FCS). - * The buffer size we share with the device is defined to be ENA_PAGE_SIZE - */ - -#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ - VLAN_HLEN - XDP_PACKET_HEADROOM - \ - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) - -#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ - ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) - struct ena_irq { irq_handler_t handler; void *data; @@ -421,47 +408,44 @@ static inline void ena_reset_device(struct ena_adapter *adapter, set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); } -enum ena_xdp_errors_t { - ENA_XDP_ALLOWED = 0, - ENA_XDP_CURRENT_MTU_TOO_LARGE, - ENA_XDP_NO_ENOUGH_QUEUES, -}; - -enum ENA_XDP_ACTIONS { - ENA_XDP_PASS = 0, - ENA_XDP_TX = BIT(0), - ENA_XDP_REDIRECT = BIT(1), - ENA_XDP_DROP = BIT(2) -}; - -#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) - -static inline bool ena_xdp_present(struct ena_adapter *adapter) -{ - return !!adapter->xdp_bpf_prog; -} - -static inline bool ena_xdp_present_ring(struct ena_ring *ring) -{ - return !!ring->xdp_bpf_prog; -} +int handle_invalid_req_id(struct ena_ring *ring, u16 req_id, + struct ena_tx_buffer *tx_info, bool is_xdp); -static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter, - u32 queues) +/* Increase a stat by cnt while holding syncp seqlock on 32bit machines */ +static inline void ena_increase_stat(u64 *statp, u64 cnt, + struct u64_stats_sync *syncp) { - return 2 * queues <= adapter->max_num_io_queues; + u64_stats_update_begin(syncp); + (*statp) += cnt; + u64_stats_update_end(syncp); } -static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) +static inline void ena_ring_tx_doorbell(struct ena_ring *tx_ring) { - enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; - - if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) - rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; - else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) - rc = ENA_XDP_NO_ENOUGH_QUEUES; - - return rc; + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + ena_increase_stat(&tx_ring->tx_stats.doorbells, 1, &tx_ring->syncp); } +int ena_xmit_common(struct net_device *dev, + struct ena_ring *ring, + struct ena_tx_buffer *tx_info, + struct ena_com_tx_ctx *ena_tx_ctx, + u16 next_to_use, + u32 bytes); +void ena_unmap_tx_buff(struct ena_ring *tx_ring, + struct ena_tx_buffer *tx_info); +void ena_init_io_rings(struct ena_adapter *adapter, + int first_index, int count); +int ena_create_io_tx_queues_in_range(struct ena_adapter *adapter, + int first_index, int count); +int ena_setup_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count); +void ena_free_all_io_tx_resources_in_range(struct ena_adapter *adapter, + int first_index, int count); +void ena_free_all_io_tx_resources(struct ena_adapter *adapter); +void ena_down(struct ena_adapter *adapter); +int ena_up(struct ena_adapter *adapter); +void ena_unmask_interrupt(struct ena_ring *tx_ring, struct ena_ring *rx_ring); +void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring); #endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.c b/drivers/net/ethernet/amazon/ena/ena_xdp.c new file mode 100644 index 000000000000..d0c8a2dc9a67 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_xdp.c @@ -0,0 +1,465 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* + * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#include "ena_xdp.h" + +static int validate_xdp_req_id(struct ena_ring *xdp_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info; + + tx_info = &xdp_ring->tx_buffer_info[req_id]; + if (likely(tx_info->xdpf)) + return 0; + + return handle_invalid_req_id(xdp_ring, req_id, tx_info, true); +} + +static int ena_xdp_tx_map_frame(struct ena_ring *xdp_ring, + struct ena_tx_buffer *tx_info, + struct xdp_frame *xdpf, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + struct ena_adapter *adapter = xdp_ring->adapter; + struct ena_com_buf *ena_buf; + int push_len = 0; + dma_addr_t dma; + void *data; + u32 size; + + tx_info->xdpf = xdpf; + data = tx_info->xdpf->data; + size = tx_info->xdpf->len; + + if (xdp_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* Designate part of the packet for LLQ */ + push_len = min_t(u32, size, xdp_ring->tx_max_header_size); + + ena_tx_ctx->push_header = data; + + size -= push_len; + data += push_len; + } + + ena_tx_ctx->header_len = push_len; + + if (size > 0) { + dma = dma_map_single(xdp_ring->dev, + data, + size, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(xdp_ring->dev, dma))) + goto error_report_dma_error; + + tx_info->map_linear_data = 0; + + ena_buf = tx_info->bufs; + ena_buf->paddr = dma; + ena_buf->len = size; + + ena_tx_ctx->ena_bufs = ena_buf; + ena_tx_ctx->num_bufs = tx_info->num_of_bufs = 1; + } + + return 0; + +error_report_dma_error: + ena_increase_stat(&xdp_ring->tx_stats.dma_mapping_err, 1, + &xdp_ring->syncp); + netif_warn(adapter, tx_queued, adapter->netdev, "Failed to map xdp buff\n"); + + return -EINVAL; +} + +int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, + struct net_device *dev, + struct xdp_frame *xdpf, + int flags) +{ + struct ena_com_tx_ctx ena_tx_ctx = {}; + struct ena_tx_buffer *tx_info; + u16 next_to_use, req_id; + int rc; + + next_to_use = xdp_ring->next_to_use; + req_id = xdp_ring->free_ids[next_to_use]; + tx_info = &xdp_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + + rc = ena_xdp_tx_map_frame(xdp_ring, tx_info, xdpf, &ena_tx_ctx); + if (unlikely(rc)) + return rc; + + ena_tx_ctx.req_id = req_id; + + rc = ena_xmit_common(dev, + xdp_ring, + tx_info, + &ena_tx_ctx, + next_to_use, + xdpf->len); + if (rc) + goto error_unmap_dma; + + /* trigger the dma engine. ena_ring_tx_doorbell() + * calls a memory barrier inside it. + */ + if (flags & XDP_XMIT_FLUSH) + ena_ring_tx_doorbell(xdp_ring); + + return rc; + +error_unmap_dma: + ena_unmap_tx_buff(xdp_ring, tx_info); + tx_info->xdpf = NULL; + return rc; +} + +int ena_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_ring *xdp_ring; + int qid, i, nxmit = 0; + + if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) + return -EINVAL; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return -ENETDOWN; + + /* We assume that all rings have the same XDP program */ + if (!READ_ONCE(adapter->rx_ring->xdp_bpf_prog)) + return -ENXIO; + + qid = smp_processor_id() % adapter->xdp_num_queues; + qid += adapter->xdp_first_ring; + xdp_ring = &adapter->tx_ring[qid]; + + /* Other CPU ids might try to send thorugh this queue */ + spin_lock(&xdp_ring->xdp_tx_lock); + + for (i = 0; i < n; i++) { + if (ena_xdp_xmit_frame(xdp_ring, dev, frames[i], 0)) + break; + nxmit++; + } + + /* Ring doorbell to make device aware of the packets */ + if (flags & XDP_XMIT_FLUSH) + ena_ring_tx_doorbell(xdp_ring); + + spin_unlock(&xdp_ring->xdp_tx_lock); + + /* Return number of packets sent */ + return nxmit; +} + +static void ena_init_all_xdp_queues(struct ena_adapter *adapter) +{ + adapter->xdp_first_ring = adapter->num_io_queues; + adapter->xdp_num_queues = adapter->num_io_queues; + + ena_init_io_rings(adapter, + adapter->xdp_first_ring, + adapter->xdp_num_queues); +} + +int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter) +{ + u32 xdp_first_ring = adapter->xdp_first_ring; + u32 xdp_num_queues = adapter->xdp_num_queues; + int rc = 0; + + rc = ena_setup_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); + if (rc) + goto setup_err; + + rc = ena_create_io_tx_queues_in_range(adapter, xdp_first_ring, xdp_num_queues); + if (rc) + goto create_err; + + return 0; + +create_err: + ena_free_all_io_tx_resources_in_range(adapter, xdp_first_ring, xdp_num_queues); +setup_err: + return rc; +} + +/* Provides a way for both kernel and bpf-prog to know + * more about the RX-queue a given XDP frame arrived on. + */ +static int ena_xdp_register_rxq_info(struct ena_ring *rx_ring) +{ + int rc; + + rc = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, rx_ring->qid, 0); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + goto err; + } + + rc = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); + + if (rc) { + netif_err(rx_ring->adapter, ifup, rx_ring->netdev, + "Failed to register xdp rx queue info memory model. RX queue num %d rc: %d\n", + rx_ring->qid, rc); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + } + +err: + return rc; +} + +static void ena_xdp_unregister_rxq_info(struct ena_ring *rx_ring) +{ + xdp_rxq_info_unreg_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); +} + +void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, int count) +{ + struct bpf_prog *old_bpf_prog; + struct ena_ring *rx_ring; + int i = 0; + + for (i = first; i < count; i++) { + rx_ring = &adapter->rx_ring[i]; + old_bpf_prog = xchg(&rx_ring->xdp_bpf_prog, prog); + + if (!old_bpf_prog && prog) { + ena_xdp_register_rxq_info(rx_ring); + rx_ring->rx_headroom = XDP_PACKET_HEADROOM; + } else if (old_bpf_prog && !prog) { + ena_xdp_unregister_rxq_info(rx_ring); + rx_ring->rx_headroom = NET_SKB_PAD; + } + } +} + +static void ena_xdp_exchange_program(struct ena_adapter *adapter, + struct bpf_prog *prog) +{ + struct bpf_prog *old_bpf_prog = xchg(&adapter->xdp_bpf_prog, prog); + + ena_xdp_exchange_program_rx_in_range(adapter, + prog, + 0, + adapter->num_io_queues); + + if (old_bpf_prog) + bpf_prog_put(old_bpf_prog); +} + +static int ena_destroy_and_free_all_xdp_queues(struct ena_adapter *adapter) +{ + bool was_up; + int rc; + + was_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + if (was_up) + ena_down(adapter); + + adapter->xdp_first_ring = 0; + adapter->xdp_num_queues = 0; + ena_xdp_exchange_program(adapter, NULL); + if (was_up) { + rc = ena_up(adapter); + if (rc) + return rc; + } + return 0; +} + +static int ena_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct bpf_prog *prog = bpf->prog; + struct bpf_prog *old_bpf_prog; + int rc, prev_mtu; + bool is_up; + + is_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + rc = ena_xdp_allowed(adapter); + if (rc == ENA_XDP_ALLOWED) { + old_bpf_prog = adapter->xdp_bpf_prog; + if (prog) { + if (!is_up) { + ena_init_all_xdp_queues(adapter); + } else if (!old_bpf_prog) { + ena_down(adapter); + ena_init_all_xdp_queues(adapter); + } + ena_xdp_exchange_program(adapter, prog); + + if (is_up && !old_bpf_prog) { + rc = ena_up(adapter); + if (rc) + return rc; + } + xdp_features_set_redirect_target(netdev, false); + } else if (old_bpf_prog) { + xdp_features_clear_redirect_target(netdev); + rc = ena_destroy_and_free_all_xdp_queues(adapter); + if (rc) + return rc; + } + + prev_mtu = netdev->max_mtu; + netdev->max_mtu = prog ? ENA_XDP_MAX_MTU : adapter->max_mtu; + + if (!old_bpf_prog) + netif_info(adapter, drv, adapter->netdev, + "XDP program is set, changing the max_mtu from %d to %d", + prev_mtu, netdev->max_mtu); + + } else if (rc == ENA_XDP_CURRENT_MTU_TOO_LARGE) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the current MTU (%d) is larger than the maximum allowed MTU (%lu) while xdp is on", + netdev->mtu, ENA_XDP_MAX_MTU); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, the current MTU is larger than the maximum allowed MTU. Check the dmesg for more info"); + return -EINVAL; + } else if (rc == ENA_XDP_NO_ENOUGH_QUEUES) { + netif_err(adapter, drv, adapter->netdev, + "Failed to set xdp program, the Rx/Tx channel count should be at most half of the maximum allowed channel count. The current queue count (%d), the maximal queue count (%d)\n", + adapter->num_io_queues, adapter->max_num_io_queues); + NL_SET_ERR_MSG_MOD(bpf->extack, + "Failed to set xdp program, there is no enough space for allocating XDP queues, Check the dmesg for more info"); + return -EINVAL; + } + + return 0; +} + +/* This is the main xdp callback, it's used by the kernel to set/unset the xdp + * program as well as to query the current xdp program id. + */ +int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf) +{ + switch (bpf->command) { + case XDP_SETUP_PROG: + return ena_xdp_set(netdev, bpf); + default: + return -EINVAL; + } + return 0; +} + +static int ena_clean_xdp_irq(struct ena_ring *xdp_ring, u32 budget) +{ + u32 total_done = 0; + u16 next_to_clean; + int tx_pkts = 0; + u16 req_id; + int rc; + + if (unlikely(!xdp_ring)) + return 0; + next_to_clean = xdp_ring->next_to_clean; + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct xdp_frame *xdpf; + + rc = ena_com_tx_comp_req_id_get(xdp_ring->ena_com_io_cq, + &req_id); + if (rc) { + if (unlikely(rc == -EINVAL)) + handle_invalid_req_id(xdp_ring, req_id, NULL, + true); + break; + } + + /* validate that the request id points to a valid xdp_frame */ + rc = validate_xdp_req_id(xdp_ring, req_id); + if (rc) + break; + + tx_info = &xdp_ring->tx_buffer_info[req_id]; + xdpf = tx_info->xdpf; + + tx_info->xdpf = NULL; + tx_info->last_jiffies = 0; + ena_unmap_tx_buff(xdp_ring, tx_info); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d skb %p completed\n", xdp_ring->qid, + xdpf); + + tx_pkts++; + total_done += tx_info->tx_descs; + + xdp_return_frame(xdpf); + xdp_ring->free_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + xdp_ring->ring_size); + } + + xdp_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(xdp_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(xdp_ring->ena_com_io_cq); + + netif_dbg(xdp_ring->adapter, tx_done, xdp_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + xdp_ring->qid, tx_pkts); + + return tx_pkts; +} + +/* This is the XDP napi callback. XDP queues use a separate napi callback + * than Rx/Tx queues. + */ +int ena_xdp_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + u32 xdp_work_done, xdp_budget; + struct ena_ring *xdp_ring; + int napi_comp_call = 0; + int ret; + + xdp_ring = ena_napi->xdp_ring; + + xdp_budget = budget; + + if (!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags) || + test_bit(ENA_FLAG_TRIGGER_RESET, &xdp_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + xdp_work_done = ena_clean_xdp_irq(xdp_ring, xdp_budget); + + /* If the device is about to reset or down, avoid unmask + * the interrupt and return 0 so NAPI won't reschedule + */ + if (unlikely(!test_bit(ENA_FLAG_DEV_UP, &xdp_ring->adapter->flags))) { + napi_complete_done(napi, 0); + ret = 0; + } else if (xdp_budget > xdp_work_done) { + napi_comp_call = 1; + if (napi_complete_done(napi, xdp_work_done)) + ena_unmask_interrupt(xdp_ring, NULL); + ena_update_ring_numa_node(xdp_ring, NULL); + ret = xdp_work_done; + } else { + ret = xdp_budget; + } + + u64_stats_update_begin(&xdp_ring->syncp); + xdp_ring->tx_stats.napi_comp += napi_comp_call; + xdp_ring->tx_stats.tx_poll++; + u64_stats_update_end(&xdp_ring->syncp); + xdp_ring->tx_stats.last_napi_jiffies = jiffies; + + return ret; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_xdp.h b/drivers/net/ethernet/amazon/ena/ena_xdp.h new file mode 100644 index 000000000000..80c749608108 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_xdp.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* + * Copyright 2015-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef ENA_XDP_H +#define ENA_XDP_H + +#include "ena_netdev.h" +#include + +/* The max MTU size is configured to be the ethernet frame size without + * the overhead of the ethernet header, which can have a VLAN header, and + * a frame check sequence (FCS). + * The buffer size we share with the device is defined to be ENA_PAGE_SIZE + */ +#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \ + VLAN_HLEN - XDP_PACKET_HEADROOM - \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + +#define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \ + ((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues)) + +enum ENA_XDP_ACTIONS { + ENA_XDP_PASS = 0, + ENA_XDP_TX = BIT(0), + ENA_XDP_REDIRECT = BIT(1), + ENA_XDP_DROP = BIT(2) +}; + +#define ENA_XDP_FORWARDED (ENA_XDP_TX | ENA_XDP_REDIRECT) + +int ena_setup_and_create_all_xdp_queues(struct ena_adapter *adapter); +void ena_xdp_exchange_program_rx_in_range(struct ena_adapter *adapter, + struct bpf_prog *prog, + int first, int count); +int ena_xdp_io_poll(struct napi_struct *napi, int budget); +int ena_xdp_xmit_frame(struct ena_ring *xdp_ring, + struct net_device *dev, + struct xdp_frame *xdpf, + int flags); +int ena_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags); +int ena_xdp(struct net_device *netdev, struct netdev_bpf *bpf); + +enum ena_xdp_errors_t { + ENA_XDP_ALLOWED = 0, + ENA_XDP_CURRENT_MTU_TOO_LARGE, + ENA_XDP_NO_ENOUGH_QUEUES, +}; + +static inline bool ena_xdp_present(struct ena_adapter *adapter) +{ + return !!adapter->xdp_bpf_prog; +} + +static inline bool ena_xdp_present_ring(struct ena_ring *ring) +{ + return !!ring->xdp_bpf_prog; +} + +static inline bool ena_xdp_legal_queue_count(struct ena_adapter *adapter, + u32 queues) +{ + return 2 * queues <= adapter->max_num_io_queues; +} + +static inline enum ena_xdp_errors_t ena_xdp_allowed(struct ena_adapter *adapter) +{ + enum ena_xdp_errors_t rc = ENA_XDP_ALLOWED; + + if (adapter->netdev->mtu > ENA_XDP_MAX_MTU) + rc = ENA_XDP_CURRENT_MTU_TOO_LARGE; + else if (!ena_xdp_legal_queue_count(adapter, adapter->num_io_queues)) + rc = ENA_XDP_NO_ENOUGH_QUEUES; + + return rc; +} + +static inline int ena_xdp_execute(struct ena_ring *rx_ring, struct xdp_buff *xdp) +{ + u32 verdict = ENA_XDP_PASS; + struct bpf_prog *xdp_prog; + struct ena_ring *xdp_ring; + struct xdp_frame *xdpf; + u64 *xdp_stat; + + xdp_prog = READ_ONCE(rx_ring->xdp_bpf_prog); + + if (!xdp_prog) + return verdict; + + verdict = bpf_prog_run_xdp(xdp_prog, xdp); + + switch (verdict) { + case XDP_TX: + xdpf = xdp_convert_buff_to_frame(xdp); + if (unlikely(!xdpf)) { + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; + break; + } + + /* Find xmit queue */ + xdp_ring = rx_ring->xdp_ring; + + /* The XDP queues are shared between XDP_TX and XDP_REDIRECT */ + spin_lock(&xdp_ring->xdp_tx_lock); + + if (ena_xdp_xmit_frame(xdp_ring, rx_ring->netdev, xdpf, + XDP_XMIT_FLUSH)) + xdp_return_frame(xdpf); + + spin_unlock(&xdp_ring->xdp_tx_lock); + xdp_stat = &rx_ring->rx_stats.xdp_tx; + verdict = ENA_XDP_TX; + break; + case XDP_REDIRECT: + if (likely(!xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog))) { + xdp_stat = &rx_ring->rx_stats.xdp_redirect; + verdict = ENA_XDP_REDIRECT; + break; + } + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; + break; + case XDP_ABORTED: + trace_xdp_exception(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_aborted; + verdict = ENA_XDP_DROP; + break; + case XDP_DROP: + xdp_stat = &rx_ring->rx_stats.xdp_drop; + verdict = ENA_XDP_DROP; + break; + case XDP_PASS: + xdp_stat = &rx_ring->rx_stats.xdp_pass; + verdict = ENA_XDP_PASS; + break; + default: + bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, verdict); + xdp_stat = &rx_ring->rx_stats.xdp_invalid; + verdict = ENA_XDP_DROP; + } + + ena_increase_stat(xdp_stat, 1, &rx_ring->syncp); + + return verdict; +} +#endif /* ENA_XDP_H */ -- cgit v1.3.1 From d3d344a1ca69d8fb2413e29e6400f3ad58a05c06 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 2 Jan 2024 16:22:20 +0000 Subject: net-device: move xdp_prog to net_device_read_rx xdp_prog is used in receive path, both from XDP enabled drivers and from netif_elide_gro(). This patch also removes two 4-bytes holes. Fixes: 43a71cd66b9c ("net-device: reorganize net_device fast path variables") Signed-off-by: Eric Dumazet Cc: Coco Li Cc: Simon Horman Link: https://lore.kernel.org/r/20240102162220.750823-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- Documentation/networking/net_cachelines/net_device.rst | 2 +- include/linux/netdevice.h | 2 +- net/core/dev.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/net_cachelines/net_device.rst b/Documentation/networking/net_cachelines/net_device.rst index 2dd8d8f20da2..e75a53593bb9 100644 --- a/Documentation/networking/net_cachelines/net_device.rst +++ b/Documentation/networking/net_cachelines/net_device.rst @@ -96,7 +96,7 @@ unsigned_char* dev_addr struct_netdev_queue* _rx read_mostly - netdev_get_rx_queue(rx) unsigned_int num_rx_queues unsigned_int real_num_rx_queues - read_mostly get_rps_cpu -struct_bpf_prog* xdp_prog +struct_bpf_prog* xdp_prog - read_mostly netif_elide_gro() unsigned_long gro_flush_timeout - read_mostly napi_complete_done int napi_defer_hard_irqs - read_mostly napi_complete_done unsigned_int gro_max_size - read_mostly skb_gro_receive diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d59db9adcc96..e265aa1f2169 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2150,6 +2150,7 @@ struct net_device { /* RX read-mostly hotpath */ __cacheline_group_begin(net_device_read_rx); + struct bpf_prog __rcu *xdp_prog; struct list_head ptype_specific; int ifindex; unsigned int real_num_rx_queues; @@ -2325,7 +2326,6 @@ struct net_device { const unsigned char *dev_addr; unsigned int num_rx_queues; - struct bpf_prog __rcu *xdp_prog; #define GRO_LEGACY_MAX_SIZE 65536u /* TCP minimal MSS is 8 (TCP_MIN_GSO_SIZE), * and shinfo->gso_segs is a 16bit field. diff --git a/net/core/dev.c b/net/core/dev.c index 31588a50b757..bc4ac49d4643 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -11670,7 +11670,7 @@ static void __init net_dev_struct_check(void) #ifdef CONFIG_NET_XGRESS CACHELINE_ASSERT_GROUP_MEMBER(struct net_device, net_device_read_rx, tcx_ingress); #endif - CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 96); + CACHELINE_ASSERT_GROUP_SIZE(struct net_device, net_device_read_rx, 104); } /* -- cgit v1.3.1 From 63c7234f50e8e760fb6b0abdc2bfb6ce83d56cc9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 4 Jan 2024 13:00:44 -0800 Subject: Revert "octeon_ep_vf: add octeon_ep_vf driver" This reverts commit c902ba322cfda8ebe54ffd53392ef7e2ef5d1c65. This reverts commit 50648968b3e3c193b45eaca07840111c9d4fdb74. This reverts commit 77cef1e02104529f54c5b8b4126317eda3ff132d. This reverts commit 8f8d322bc47c1c5ecab1f2238b644e30f69cc475. This reverts commit 6ca7b5486ebd5e7985f0c98a2ac7ae49078043a4. This reverts commit db468f92c3b9437dfeb1dcf55d9b7d1b97769a6c. This reverts commit 5f8c64c2344c888a03fa4b7fd8c3b5e0c235d879. This reverts commit ebdc193b2ce209bfc1ebec2f777cd7bac00b547c. The driver needs more work. Signed-off-by: Jakub Kicinski --- .../networking/device_drivers/ethernet/index.rst | 1 - .../ethernet/marvell/octeon_ep_vf.rst | 24 - MAINTAINERS | 9 - drivers/net/ethernet/marvell/Kconfig | 1 - drivers/net/ethernet/marvell/Makefile | 1 - drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig | 19 - drivers/net/ethernet/marvell/octeon_ep_vf/Makefile | 10 - .../ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c | 488 -------- .../ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c | 500 -------- .../marvell/octeon_ep_vf/octep_vf_config.h | 160 --- .../marvell/octeon_ep_vf/octep_vf_ethtool.c | 307 ----- .../ethernet/marvell/octeon_ep_vf/octep_vf_main.c | 1230 -------------------- .../ethernet/marvell/octeon_ep_vf/octep_vf_main.h | 338 ------ .../ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c | 430 ------- .../ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h | 166 --- .../marvell/octeon_ep_vf/octep_vf_regs_cn9k.h | 154 --- .../marvell/octeon_ep_vf/octep_vf_regs_cnxk.h | 162 --- .../ethernet/marvell/octeon_ep_vf/octep_vf_rx.c | 511 -------- .../ethernet/marvell/octeon_ep_vf/octep_vf_rx.h | 224 ---- .../ethernet/marvell/octeon_ep_vf/octep_vf_tx.c | 331 ------ .../ethernet/marvell/octeon_ep_vf/octep_vf_tx.h | 276 ----- 21 files changed, 5342 deletions(-) delete mode 100644 Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/Makefile delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c delete mode 100644 drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/index.rst b/Documentation/networking/device_drivers/ethernet/index.rst index 6932d8c043c2..43de285b8a92 100644 --- a/Documentation/networking/device_drivers/ethernet/index.rst +++ b/Documentation/networking/device_drivers/ethernet/index.rst @@ -42,7 +42,6 @@ Contents: intel/ice marvell/octeontx2 marvell/octeon_ep - marvell/octeon_ep_vf mellanox/mlx5/index microsoft/netvsc neterion/s2io diff --git a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst b/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst deleted file mode 100644 index 603133d0b92f..000000000000 --- a/Documentation/networking/device_drivers/ethernet/marvell/octeon_ep_vf.rst +++ /dev/null @@ -1,24 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0+ - -======================================================================= -Linux kernel networking driver for Marvell's Octeon PCI Endpoint NIC VF -======================================================================= - -Network driver for Marvell's Octeon PCI EndPoint NIC VF. -Copyright (c) 2020 Marvell International Ltd. - -Overview -======== -This driver implements networking functionality of Marvell's Octeon PCI -EndPoint NIC VF. - -Supported Devices -================= -Currently, this driver support following devices: - * Network controller: Cavium, Inc. Device b203 - * Network controller: Cavium, Inc. Device b403 - * Network controller: Cavium, Inc. Device b103 - * Network controller: Cavium, Inc. Device b903 - * Network controller: Cavium, Inc. Device ba03 - * Network controller: Cavium, Inc. Device bc03 - * Network controller: Cavium, Inc. Device bd03 diff --git a/MAINTAINERS b/MAINTAINERS index e5670722d3d0..79ac49b113dc 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12861,15 +12861,6 @@ L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/marvell/octeon_ep -MARVELL OCTEON ENDPOINT VF DRIVER -M: Veerasenareddy Burru -M: Sathesh Edara -M: Shinas Rasheed -M: Satananda Burla -L: netdev@vger.kernel.org -S: Supported -F: drivers/net/ethernet/marvell/octeon_ep_vf - MARVELL OCTEONTX2 PHYSICAL FUNCTION DRIVER M: Sunil Goutham M: Geetha sowjanya diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig index 837295fecd17..884d64114bff 100644 --- a/drivers/net/ethernet/marvell/Kconfig +++ b/drivers/net/ethernet/marvell/Kconfig @@ -180,7 +180,6 @@ config SKY2_DEBUG source "drivers/net/ethernet/marvell/octeontx2/Kconfig" source "drivers/net/ethernet/marvell/octeon_ep/Kconfig" -source "drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig" source "drivers/net/ethernet/marvell/prestera/Kconfig" endif # NET_VENDOR_MARVELL diff --git a/drivers/net/ethernet/marvell/Makefile b/drivers/net/ethernet/marvell/Makefile index a399defe25fd..ceba4aa4f026 100644 --- a/drivers/net/ethernet/marvell/Makefile +++ b/drivers/net/ethernet/marvell/Makefile @@ -12,6 +12,5 @@ obj-$(CONFIG_PXA168_ETH) += pxa168_eth.o obj-$(CONFIG_SKGE) += skge.o obj-$(CONFIG_SKY2) += sky2.o obj-y += octeon_ep/ -obj-y += octeon_ep_vf/ obj-y += octeontx2/ obj-y += prestera/ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig b/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig deleted file mode 100644 index dbd1267bda0c..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -# -# Marvell's Octeon PCI Endpoint NIC VF Driver Configuration -# - -config OCTEON_EP_VF - tristate "Marvell Octeon PCI Endpoint NIC VF Driver" - depends on 64BIT - depends on PCI - help - This driver supports networking functionality of Marvell's - Octeon PCI Endpoint NIC VF. - - To know the list of devices supported by this driver, refer - documentation in - . - - To compile this drivers as a module, choose M here. Name of the - module is octeon_ep_vf. diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile b/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile deleted file mode 100644 index 4a5f9fcb0b40..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Network driver for Marvell's Octeon PCI Endpoint NIC VF -# - -obj-$(CONFIG_OCTEON_EP_VF) += octeon_ep_vf.o - -octeon_ep_vf-y := octep_vf_main.o octep_vf_cn9k.o octep_vf_cnxk.o \ - octep_vf_tx.o octep_vf_rx.o octep_vf_mbox.o \ - octep_vf_ethtool.o diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c deleted file mode 100644 index 292abc897a83..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cn9k.c +++ /dev/null @@ -1,488 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#include -#include -#include - -#include "octep_vf_config.h" -#include "octep_vf_main.h" -#include "octep_vf_regs_cn9k.h" - -/* Dump useful hardware IQ/OQ CSRs for debug purpose */ -static void cn93_vf_dump_q_regs(struct octep_vf_device *oct, int qno) -{ - struct device *dev = &oct->pdev->dev; - - dev_info(dev, "IQ-%d register dump\n", qno); - dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_INSTR_DBELL(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(qno))); - dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_CONTROL(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(qno))); - dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_ENABLE(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(qno))); - dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_INSTR_BADDR(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_BADDR(qno))); - dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_INSTR_RSIZE(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_RSIZE(qno))); - dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_CNTS(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CNTS(qno))); - dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_INT_LEVELS(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(qno))); - dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_PKT_CNT(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_PKT_CNT(qno))); - dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_IN_BYTE_CNT(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_BYTE_CNT(qno))); - - dev_info(dev, "OQ-%d register dump\n", qno); - dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_SLIST_DBELL(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_DBELL(qno))); - dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_CONTROL(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(qno))); - dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_ENABLE(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(qno))); - dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_SLIST_BADDR(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_BADDR(qno))); - dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_SLIST_RSIZE(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_RSIZE(qno))); - dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_CNTS(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CNTS(qno))); - dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_INT_LEVELS(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(qno))); - dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_PKT_CNT(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_PKT_CNT(qno))); - dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n", - qno, CN93_VF_SDP_R_OUT_BYTE_CNT(qno), - octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_BYTE_CNT(qno))); -} - -/* Reset Hardware Tx queue */ -static int cn93_vf_reset_iq(struct octep_vf_device *oct, int q_no) -{ - u64 val = 0ULL; - - dev_dbg(&oct->pdev->dev, "Reset VF IQ-%d\n", q_no); - - /* Disable the Tx/Instruction Ring */ - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(q_no), val); - - /* clear the Instruction Ring packet/byte counts and doorbell CSRs */ - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q_no), val); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_PKT_CNT(q_no), val); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_BYTE_CNT(q_no), val); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_BADDR(q_no), val); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_RSIZE(q_no), val); - - val = 0xFFFFFFFF; - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(q_no), val); - - val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CNTS(q_no)); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_CNTS(q_no), val & 0xFFFFFFFF); - - return 0; -} - -/* Reset Hardware Rx queue */ -static void cn93_vf_reset_oq(struct octep_vf_device *oct, int q_no) -{ - u64 val = 0ULL; - - /* Disable Output (Rx) Ring */ - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(q_no), val); - - /* Clear count CSRs */ - val = octep_vf_read_csr(oct, CN93_VF_SDP_R_OUT_CNTS(q_no)); - octep_vf_write_csr(oct, CN93_VF_SDP_R_OUT_CNTS(q_no), val); - - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF); -} - -/* Reset all hardware Tx/Rx queues */ -static void octep_vf_reset_io_queues_cn93(struct octep_vf_device *oct) -{ - struct pci_dev *pdev = oct->pdev; - int q; - - dev_dbg(&pdev->dev, "Reset OCTEP_CN93 VF IO Queues\n"); - - for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { - cn93_vf_reset_iq(oct, q); - cn93_vf_reset_oq(oct, q); - } -} - -/* Initialize configuration limits and initial active config */ -static void octep_vf_init_config_cn93_vf(struct octep_vf_device *oct) -{ - struct octep_vf_config *conf = oct->conf; - u64 reg_val; - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(0)); - conf->ring_cfg.max_io_rings = (reg_val >> CN93_VF_R_IN_CTL_RPVF_POS) & - CN93_VF_R_IN_CTL_RPVF_MASK; - conf->ring_cfg.active_io_rings = conf->ring_cfg.max_io_rings; - - conf->iq.num_descs = OCTEP_VF_IQ_MAX_DESCRIPTORS; - conf->iq.instr_type = OCTEP_VF_64BYTE_INSTR; - conf->iq.db_min = OCTEP_VF_DB_MIN; - conf->iq.intr_threshold = OCTEP_VF_IQ_INTR_THRESHOLD; - - conf->oq.num_descs = OCTEP_VF_OQ_MAX_DESCRIPTORS; - conf->oq.buf_size = OCTEP_VF_OQ_BUF_SIZE; - conf->oq.refill_threshold = OCTEP_VF_OQ_REFILL_THRESHOLD; - conf->oq.oq_intr_pkt = OCTEP_VF_OQ_INTR_PKT_THRESHOLD; - conf->oq.oq_intr_time = OCTEP_VF_OQ_INTR_TIME_THRESHOLD; - - conf->msix_cfg.ioq_msix = conf->ring_cfg.active_io_rings; -} - -/* Setup registers for a hardware Tx Queue */ -static void octep_vf_setup_iq_regs_cn93(struct octep_vf_device *oct, int iq_no) -{ - struct octep_vf_iq *iq = oct->iq[iq_no]; - u32 reset_instr_cnt; - u64 reg_val; - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(iq_no)); - - /* wait for IDLE to set to 1 */ - if (!(reg_val & CN93_VF_R_IN_CTL_IDLE)) { - do { - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(iq_no)); - } while (!(reg_val & CN93_VF_R_IN_CTL_IDLE)); - } - reg_val |= CN93_VF_R_IN_CTL_RDSIZE; - reg_val |= CN93_VF_R_IN_CTL_IS_64B; - reg_val |= CN93_VF_R_IN_CTL_ESR; - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_CONTROL(iq_no), reg_val); - - /* Write the start of the input queue's ring and its size */ - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_BADDR(iq_no), iq->desc_ring_dma); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_RSIZE(iq_no), iq->max_count); - - /* Remember the doorbell & instruction count register addr for this queue */ - iq->doorbell_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_IN_INSTR_DBELL(iq_no); - iq->inst_cnt_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_IN_CNTS(iq_no); - iq->intr_lvl_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_IN_INT_LEVELS(iq_no); - - /* Store the current instruction counter (used in flush_iq calculation) */ - reset_instr_cnt = readl(iq->inst_cnt_reg); - writel(reset_instr_cnt, iq->inst_cnt_reg); - - /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */ - reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff; - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val); -} - -/* Setup registers for a hardware Rx Queue */ -static void octep_vf_setup_oq_regs_cn93(struct octep_vf_device *oct, int oq_no) -{ - struct octep_vf_oq *oq = oct->oq[oq_no]; - u32 time_threshold = 0; - u64 oq_ctl = 0ULL; - u64 reg_val; - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no)); - - /* wait for IDLE to set to 1 */ - if (!(reg_val & CN93_VF_R_OUT_CTL_IDLE)) { - do { - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no)); - } while (!(reg_val & CN93_VF_R_OUT_CTL_IDLE)); - } - - reg_val &= ~(CN93_VF_R_OUT_CTL_IMODE); - reg_val &= ~(CN93_VF_R_OUT_CTL_ROR_P); - reg_val &= ~(CN93_VF_R_OUT_CTL_NSR_P); - reg_val &= ~(CN93_VF_R_OUT_CTL_ROR_I); - reg_val &= ~(CN93_VF_R_OUT_CTL_NSR_I); - reg_val &= ~(CN93_VF_R_OUT_CTL_ES_I); - reg_val &= ~(CN93_VF_R_OUT_CTL_ROR_D); - reg_val &= ~(CN93_VF_R_OUT_CTL_NSR_D); - reg_val &= ~(CN93_VF_R_OUT_CTL_ES_D); - reg_val |= (CN93_VF_R_OUT_CTL_ES_P); - - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no), reg_val); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_BADDR(oq_no), oq->desc_ring_dma); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_RSIZE(oq_no), oq->max_count); - - oq_ctl = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no)); - oq_ctl &= ~0x7fffffULL; //clear the ISIZE and BSIZE (22-0) - oq_ctl |= (oq->buffer_size & 0xffff); //populate the BSIZE (15-0) - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_CONTROL(oq_no), oq_ctl); - - /* Get the mapped address of the pkt_sent and pkts_credit regs */ - oq->pkts_sent_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_OUT_CNTS(oq_no); - oq->pkts_credit_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_OUT_SLIST_DBELL(oq_no); - - time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf); - reg_val = ((u64)time_threshold << 32) | CFG_GET_OQ_INTR_PKT(oct->conf); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); -} - -/* Setup registers for a VF mailbox */ -static void octep_vf_setup_mbox_regs_cn93(struct octep_vf_device *oct, int q_no) -{ - struct octep_vf_mbox *mbox = oct->mbox; - - /* PF to VF DATA reg. VF reads from this reg */ - mbox->mbox_read_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_MBOX_PF_VF_DATA(q_no); - - /* VF mbox interrupt reg */ - mbox->mbox_int_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_MBOX_PF_VF_INT(q_no); - - /* VF to PF DATA reg. VF writes into this reg */ - mbox->mbox_write_reg = oct->mmio.hw_addr + CN93_VF_SDP_R_MBOX_VF_PF_DATA(q_no); -} - -/* Mailbox Interrupt handler */ -static void cn93_handle_vf_mbox_intr(struct octep_vf_device *oct) -{ - if (oct->mbox) - schedule_work(&oct->mbox->wk.work); - else - dev_err(&oct->pdev->dev, "cannot schedule work on invalid mbox\n"); -} - -/* Tx/Rx queue interrupt handler */ -static irqreturn_t octep_vf_ioq_intr_handler_cn93(void *data) -{ - struct octep_vf_ioq_vector *vector = (struct octep_vf_ioq_vector *)data; - struct octep_vf_oq *oq = vector->oq; - struct octep_vf_device *oct = vector->octep_vf_dev; - u64 reg_val = 0ULL; - - /* Mailbox interrupt arrives along with interrupt of tx/rx ring pair 0 */ - if (oq->q_no == 0) { - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0)); - if (reg_val & CN93_VF_SDP_R_MBOX_PF_VF_INT_STATUS) { - cn93_handle_vf_mbox_intr(oct); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0), reg_val); - } - } - napi_schedule_irqoff(oq->napi); - return IRQ_HANDLED; -} - -/* Re-initialize Octeon hardware registers */ -static void octep_vf_reinit_regs_cn93(struct octep_vf_device *oct) -{ - u32 i; - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) - oct->hw_ops.setup_iq_regs(oct, i); - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) - oct->hw_ops.setup_oq_regs(oct, i); - - oct->hw_ops.enable_interrupts(oct); - oct->hw_ops.enable_io_queues(oct); - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) - writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg); -} - -/* Enable all interrupts */ -static void octep_vf_enable_interrupts_cn93(struct octep_vf_device *oct) -{ - int num_rings, q; - u64 reg_val; - - num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (q = 0; q < num_rings; q++) { - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q), reg_val); - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q), reg_val); - } - /* Enable PF to VF mbox interrupt by setting 2nd bit*/ - octep_vf_write_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0), - CN93_VF_SDP_R_MBOX_PF_VF_INT_ENAB); -} - -/* Disable all interrupts */ -static void octep_vf_disable_interrupts_cn93(struct octep_vf_device *oct) -{ - int num_rings, q; - u64 reg_val; - - /* Disable PF to VF mbox interrupt by setting 2nd bit*/ - if (oct->mbox) - octep_vf_write_csr64(oct, CN93_VF_SDP_R_MBOX_PF_VF_INT(0), 0x0); - - num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (q = 0; q < num_rings; q++) { - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q)); - reg_val &= ~(0x1ULL << 62); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(q), reg_val); - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q)); - reg_val &= ~(0x1ULL << 62); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(q), reg_val); - } -} - -/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ -static u32 octep_vf_update_iq_read_index_cn93(struct octep_vf_iq *iq) -{ - u32 pkt_in_done = readl(iq->inst_cnt_reg); - u32 last_done, new_idx; - - last_done = pkt_in_done - iq->pkt_in_done; - iq->pkt_in_done = pkt_in_done; - - new_idx = (iq->octep_vf_read_index + last_done) % iq->max_count; - - return new_idx; -} - -/* Enable a hardware Tx Queue */ -static void octep_vf_enable_iq_cn93(struct octep_vf_device *oct, int iq_no) -{ - u64 loop = HZ; - u64 reg_val; - - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF); - - while (octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INSTR_DBELL(iq_no)) && - loop--) { - schedule_timeout_interruptible(1); - } - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(iq_no)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val); - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no)); - reg_val |= 0x1ULL; - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no), reg_val); -} - -/* Enable a hardware Rx Queue */ -static void octep_vf_enable_oq_cn93(struct octep_vf_device *oct, int oq_no) -{ - u64 reg_val = 0ULL; - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); - - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF); - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no)); - reg_val |= 0x1ULL; - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no), reg_val); -} - -/* Enable all hardware Tx/Rx Queues assigned to VF */ -static void octep_vf_enable_io_queues_cn93(struct octep_vf_device *oct) -{ - u8 q; - - for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { - octep_vf_enable_iq_cn93(oct, q); - octep_vf_enable_oq_cn93(oct, q); - } -} - -/* Disable a hardware Tx Queue assigned to VF */ -static void octep_vf_disable_iq_cn93(struct octep_vf_device *oct, int iq_no) -{ - u64 reg_val = 0ULL; - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no)); - reg_val &= ~0x1ULL; - octep_vf_write_csr64(oct, CN93_VF_SDP_R_IN_ENABLE(iq_no), reg_val); -} - -/* Disable a hardware Rx Queue assigned to VF */ -static void octep_vf_disable_oq_cn93(struct octep_vf_device *oct, int oq_no) -{ - u64 reg_val = 0ULL; - - reg_val = octep_vf_read_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no)); - reg_val &= ~0x1ULL; - octep_vf_write_csr64(oct, CN93_VF_SDP_R_OUT_ENABLE(oq_no), reg_val); -} - -/* Disable all hardware Tx/Rx Queues assigned to VF */ -static void octep_vf_disable_io_queues_cn93(struct octep_vf_device *oct) -{ - int q = 0; - - for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { - octep_vf_disable_iq_cn93(oct, q); - octep_vf_disable_oq_cn93(oct, q); - } -} - -/* Dump hardware registers (including Tx/Rx queues) for debugging. */ -static void octep_vf_dump_registers_cn93(struct octep_vf_device *oct) -{ - u8 num_rings, q; - - num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (q = 0; q < num_rings; q++) - cn93_vf_dump_q_regs(oct, q); -} - -/** - * octep_vf_device_setup_cn93() - Setup Octeon device. - * - * @oct: Octeon device private data structure. - * - * - initialize hardware operations. - * - get target side pcie port number for the device. - * - set initial configuration and max limits. - */ -void octep_vf_device_setup_cn93(struct octep_vf_device *oct) -{ - oct->hw_ops.setup_iq_regs = octep_vf_setup_iq_regs_cn93; - oct->hw_ops.setup_oq_regs = octep_vf_setup_oq_regs_cn93; - oct->hw_ops.setup_mbox_regs = octep_vf_setup_mbox_regs_cn93; - - oct->hw_ops.ioq_intr_handler = octep_vf_ioq_intr_handler_cn93; - oct->hw_ops.reinit_regs = octep_vf_reinit_regs_cn93; - - oct->hw_ops.enable_interrupts = octep_vf_enable_interrupts_cn93; - oct->hw_ops.disable_interrupts = octep_vf_disable_interrupts_cn93; - - oct->hw_ops.update_iq_read_idx = octep_vf_update_iq_read_index_cn93; - - oct->hw_ops.enable_iq = octep_vf_enable_iq_cn93; - oct->hw_ops.enable_oq = octep_vf_enable_oq_cn93; - oct->hw_ops.enable_io_queues = octep_vf_enable_io_queues_cn93; - - oct->hw_ops.disable_iq = octep_vf_disable_iq_cn93; - oct->hw_ops.disable_oq = octep_vf_disable_oq_cn93; - oct->hw_ops.disable_io_queues = octep_vf_disable_io_queues_cn93; - oct->hw_ops.reset_io_queues = octep_vf_reset_io_queues_cn93; - - oct->hw_ops.dump_registers = octep_vf_dump_registers_cn93; - octep_vf_init_config_cn93_vf(oct); -} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c deleted file mode 100644 index c0994dc04319..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_cnxk.c +++ /dev/null @@ -1,500 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#include -#include -#include - -#include "octep_vf_config.h" -#include "octep_vf_main.h" -#include "octep_vf_regs_cnxk.h" - -/* Dump useful hardware IQ/OQ CSRs for debug purpose */ -static void cnxk_vf_dump_q_regs(struct octep_vf_device *oct, int qno) -{ - struct device *dev = &oct->pdev->dev; - - dev_info(dev, "IQ-%d register dump\n", qno); - dev_info(dev, "R[%d]_IN_INSTR_DBELL[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_INSTR_DBELL(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(qno))); - dev_info(dev, "R[%d]_IN_CONTROL[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_CONTROL(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(qno))); - dev_info(dev, "R[%d]_IN_ENABLE[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_ENABLE(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(qno))); - dev_info(dev, "R[%d]_IN_INSTR_BADDR[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_INSTR_BADDR(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_BADDR(qno))); - dev_info(dev, "R[%d]_IN_INSTR_RSIZE[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_INSTR_RSIZE(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_RSIZE(qno))); - dev_info(dev, "R[%d]_IN_CNTS[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_CNTS(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CNTS(qno))); - dev_info(dev, "R[%d]_IN_INT_LEVELS[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_INT_LEVELS(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(qno))); - dev_info(dev, "R[%d]_IN_PKT_CNT[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_PKT_CNT(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_PKT_CNT(qno))); - dev_info(dev, "R[%d]_IN_BYTE_CNT[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_IN_BYTE_CNT(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_BYTE_CNT(qno))); - - dev_info(dev, "OQ-%d register dump\n", qno); - dev_info(dev, "R[%d]_OUT_SLIST_DBELL[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_SLIST_DBELL(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_DBELL(qno))); - dev_info(dev, "R[%d]_OUT_CONTROL[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_CONTROL(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(qno))); - dev_info(dev, "R[%d]_OUT_ENABLE[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_ENABLE(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(qno))); - dev_info(dev, "R[%d]_OUT_SLIST_BADDR[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_SLIST_BADDR(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(qno))); - dev_info(dev, "R[%d]_OUT_SLIST_RSIZE[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(qno))); - dev_info(dev, "R[%d]_OUT_CNTS[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_CNTS(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CNTS(qno))); - dev_info(dev, "R[%d]_OUT_INT_LEVELS[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_INT_LEVELS(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(qno))); - dev_info(dev, "R[%d]_OUT_PKT_CNT[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_PKT_CNT(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_PKT_CNT(qno))); - dev_info(dev, "R[%d]_OUT_BYTE_CNT[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_OUT_BYTE_CNT(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_BYTE_CNT(qno))); - dev_info(dev, "R[%d]_ERR_TYPE[0x%llx]: 0x%016llx\n", - qno, CNXK_VF_SDP_R_ERR_TYPE(qno), - octep_vf_read_csr64(oct, CNXK_VF_SDP_R_ERR_TYPE(qno))); -} - -/* Reset Hardware Tx queue */ -static int cnxk_vf_reset_iq(struct octep_vf_device *oct, int q_no) -{ - u64 val = 0ULL; - - dev_dbg(&oct->pdev->dev, "Reset VF IQ-%d\n", q_no); - - /* Disable the Tx/Instruction Ring */ - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(q_no), val); - - /* clear the Instruction Ring packet/byte counts and doorbell CSRs */ - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q_no), val); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_PKT_CNT(q_no), val); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_BYTE_CNT(q_no), val); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_BADDR(q_no), val); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_RSIZE(q_no), val); - - val = 0xFFFFFFFF; - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(q_no), val); - - val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CNTS(q_no)); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_CNTS(q_no), val & 0xFFFFFFFF); - - return 0; -} - -/* Reset Hardware Rx queue */ -static void cnxk_vf_reset_oq(struct octep_vf_device *oct, int q_no) -{ - u64 val = 0ULL; - - /* Disable Output (Rx) Ring */ - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(q_no), val); - - /* Clear count CSRs */ - val = octep_vf_read_csr(oct, CNXK_VF_SDP_R_OUT_CNTS(q_no)); - octep_vf_write_csr(oct, CNXK_VF_SDP_R_OUT_CNTS(q_no), val); - - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_PKT_CNT(q_no), 0xFFFFFFFFFULL); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_DBELL(q_no), 0xFFFFFFFF); -} - -/* Reset all hardware Tx/Rx queues */ -static void octep_vf_reset_io_queues_cnxk(struct octep_vf_device *oct) -{ - struct pci_dev *pdev = oct->pdev; - int q; - - dev_dbg(&pdev->dev, "Reset OCTEP_CNXK VF IO Queues\n"); - - for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { - cnxk_vf_reset_iq(oct, q); - cnxk_vf_reset_oq(oct, q); - } -} - -/* Initialize configuration limits and initial active config */ -static void octep_vf_init_config_cnxk_vf(struct octep_vf_device *oct) -{ - struct octep_vf_config *conf = oct->conf; - u64 reg_val; - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(0)); - conf->ring_cfg.max_io_rings = (reg_val >> CNXK_VF_R_IN_CTL_RPVF_POS) & - CNXK_VF_R_IN_CTL_RPVF_MASK; - conf->ring_cfg.active_io_rings = conf->ring_cfg.max_io_rings; - - conf->iq.num_descs = OCTEP_VF_IQ_MAX_DESCRIPTORS; - conf->iq.instr_type = OCTEP_VF_64BYTE_INSTR; - conf->iq.db_min = OCTEP_VF_DB_MIN; - conf->iq.intr_threshold = OCTEP_VF_IQ_INTR_THRESHOLD; - - conf->oq.num_descs = OCTEP_VF_OQ_MAX_DESCRIPTORS; - conf->oq.buf_size = OCTEP_VF_OQ_BUF_SIZE; - conf->oq.refill_threshold = OCTEP_VF_OQ_REFILL_THRESHOLD; - conf->oq.oq_intr_pkt = OCTEP_VF_OQ_INTR_PKT_THRESHOLD; - conf->oq.oq_intr_time = OCTEP_VF_OQ_INTR_TIME_THRESHOLD; - conf->oq.wmark = OCTEP_VF_OQ_WMARK_MIN; - - conf->msix_cfg.ioq_msix = conf->ring_cfg.active_io_rings; -} - -/* Setup registers for a hardware Tx Queue */ -static void octep_vf_setup_iq_regs_cnxk(struct octep_vf_device *oct, int iq_no) -{ - struct octep_vf_iq *iq = oct->iq[iq_no]; - u32 reset_instr_cnt; - u64 reg_val; - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(iq_no)); - - /* wait for IDLE to set to 1 */ - if (!(reg_val & CNXK_VF_R_IN_CTL_IDLE)) { - do { - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(iq_no)); - } while (!(reg_val & CNXK_VF_R_IN_CTL_IDLE)); - } - reg_val |= CNXK_VF_R_IN_CTL_RDSIZE; - reg_val |= CNXK_VF_R_IN_CTL_IS_64B; - reg_val |= CNXK_VF_R_IN_CTL_ESR; - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_CONTROL(iq_no), reg_val); - - /* Write the start of the input queue's ring and its size */ - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_BADDR(iq_no), iq->desc_ring_dma); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_RSIZE(iq_no), iq->max_count); - - /* Remember the doorbell & instruction count register addr for this queue */ - iq->doorbell_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_IN_INSTR_DBELL(iq_no); - iq->inst_cnt_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_IN_CNTS(iq_no); - iq->intr_lvl_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no); - - /* Store the current instruction counter (used in flush_iq calculation) */ - reset_instr_cnt = readl(iq->inst_cnt_reg); - writel(reset_instr_cnt, iq->inst_cnt_reg); - - /* INTR_THRESHOLD is set to max(FFFFFFFF) to disable the INTR */ - reg_val = CFG_GET_IQ_INTR_THRESHOLD(oct->conf) & 0xffffffff; - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val); -} - -/* Setup registers for a hardware Rx Queue */ -static void octep_vf_setup_oq_regs_cnxk(struct octep_vf_device *oct, int oq_no) -{ - struct octep_vf_oq *oq = oct->oq[oq_no]; - u32 time_threshold = 0; - u64 oq_ctl = 0ULL; - u64 reg_val; - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no)); - - /* wait for IDLE to set to 1 */ - if (!(reg_val & CNXK_VF_R_OUT_CTL_IDLE)) { - do { - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no)); - } while (!(reg_val & CNXK_VF_R_OUT_CTL_IDLE)); - } - - reg_val &= ~(CNXK_VF_R_OUT_CTL_IMODE); - reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_P); - reg_val &= ~(CNXK_VF_R_OUT_CTL_NSR_P); - reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_I); - reg_val &= ~(CNXK_VF_R_OUT_CTL_NSR_I); - reg_val &= ~(CNXK_VF_R_OUT_CTL_ES_I); - reg_val &= ~(CNXK_VF_R_OUT_CTL_ROR_D); - reg_val &= ~(CNXK_VF_R_OUT_CTL_NSR_D); - reg_val &= ~(CNXK_VF_R_OUT_CTL_ES_D); - reg_val |= (CNXK_VF_R_OUT_CTL_ES_P); - - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no), reg_val); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_BADDR(oq_no), oq->desc_ring_dma); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_RSIZE(oq_no), oq->max_count); - - oq_ctl = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no)); - /* Clear the ISIZE and BSIZE (22-0) */ - oq_ctl &= ~0x7fffffULL; - /* Populate the BSIZE (15-0) */ - oq_ctl |= (oq->buffer_size & 0xffff); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_CONTROL(oq_no), oq_ctl); - - /* Get the mapped address of the pkt_sent and pkts_credit regs */ - oq->pkts_sent_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_OUT_CNTS(oq_no); - oq->pkts_credit_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_OUT_SLIST_DBELL(oq_no); - - time_threshold = CFG_GET_OQ_INTR_TIME(oct->conf); - reg_val = ((u64)time_threshold << 32) | CFG_GET_OQ_INTR_PKT(oct->conf); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); - - /* set watermark for backpressure */ - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no)); - reg_val &= ~0xFFFFFFFFULL; - reg_val |= CFG_GET_OQ_WMARK(oct->conf); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_WMARK(oq_no), reg_val); -} - -/* Setup registers for a VF mailbox */ -static void octep_vf_setup_mbox_regs_cnxk(struct octep_vf_device *oct, int q_no) -{ - struct octep_vf_mbox *mbox = oct->mbox; - - /* PF to VF DATA reg. VF reads from this reg */ - mbox->mbox_read_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_MBOX_PF_VF_DATA(q_no); - - /* VF mbox interrupt reg */ - mbox->mbox_int_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_MBOX_PF_VF_INT(q_no); - - /* VF to PF DATA reg. VF writes into this reg */ - mbox->mbox_write_reg = oct->mmio.hw_addr + CNXK_VF_SDP_R_MBOX_VF_PF_DATA(q_no); -} - -/* Mailbox Interrupt handler */ -static void cnxk_handle_vf_mbox_intr(struct octep_vf_device *oct) -{ - if (oct->mbox) - schedule_work(&oct->mbox->wk.work); - else - dev_err(&oct->pdev->dev, "cannot schedule work on invalid mbox\n"); -} - -/* Tx/Rx queue interrupt handler */ -static irqreturn_t octep_vf_ioq_intr_handler_cnxk(void *data) -{ - struct octep_vf_ioq_vector *vector = (struct octep_vf_ioq_vector *)data; - struct octep_vf_oq *oq = vector->oq; - struct octep_vf_device *oct = vector->octep_vf_dev; - u64 reg_val = 0ULL; - - /* Mailbox interrupt arrives along with interrupt of tx/rx ring pair 0 */ - if (oq->q_no == 0) { - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0)); - if (reg_val & CNXK_VF_SDP_R_MBOX_PF_VF_INT_STATUS) { - cnxk_handle_vf_mbox_intr(oct); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0), reg_val); - } - } - napi_schedule_irqoff(oq->napi); - return IRQ_HANDLED; -} - -/* Re-initialize Octeon hardware registers */ -static void octep_vf_reinit_regs_cnxk(struct octep_vf_device *oct) -{ - u32 i; - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) - oct->hw_ops.setup_iq_regs(oct, i); - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) - oct->hw_ops.setup_oq_regs(oct, i); - - oct->hw_ops.enable_interrupts(oct); - oct->hw_ops.enable_io_queues(oct); - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) - writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg); -} - -/* Enable all interrupts */ -static void octep_vf_enable_interrupts_cnxk(struct octep_vf_device *oct) -{ - int num_rings, q; - u64 reg_val; - - num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (q = 0; q < num_rings; q++) { - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q), reg_val); - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q), reg_val); - } - /* Enable PF to VF mbox interrupt by setting 2nd bit*/ - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0), - CNXK_VF_SDP_R_MBOX_PF_VF_INT_ENAB); -} - -/* Disable all interrupts */ -static void octep_vf_disable_interrupts_cnxk(struct octep_vf_device *oct) -{ - int num_rings, q; - u64 reg_val; - - /* Disable PF to VF mbox interrupt by setting 2nd bit*/ - if (oct->mbox) - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_MBOX_PF_VF_INT(0), 0x0); - - num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (q = 0; q < num_rings; q++) { - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q)); - reg_val &= ~(0x1ULL << 62); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(q), reg_val); - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q)); - reg_val &= ~(0x1ULL << 62); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(q), reg_val); - } -} - -/* Get new Octeon Read Index: index of descriptor that Octeon reads next. */ -static u32 octep_vf_update_iq_read_index_cnxk(struct octep_vf_iq *iq) -{ - u32 pkt_in_done = readl(iq->inst_cnt_reg); - u32 last_done, new_idx; - - last_done = pkt_in_done - iq->pkt_in_done; - iq->pkt_in_done = pkt_in_done; - - new_idx = (iq->octep_vf_read_index + last_done) % iq->max_count; - - return new_idx; -} - -/* Enable a hardware Tx Queue */ -static void octep_vf_enable_iq_cnxk(struct octep_vf_device *oct, int iq_no) -{ - u64 loop = HZ; - u64 reg_val; - - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(iq_no), 0xFFFFFFFF); - - while (octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INSTR_DBELL(iq_no)) && - loop--) { - schedule_timeout_interruptible(1); - } - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_INT_LEVELS(iq_no), reg_val); - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no)); - reg_val |= 0x1ULL; - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no), reg_val); -} - -/* Enable a hardware Rx Queue */ -static void octep_vf_enable_oq_cnxk(struct octep_vf_device *oct, int oq_no) -{ - u64 reg_val = 0ULL; - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(oq_no)); - reg_val |= (0x1ULL << 62); - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_INT_LEVELS(oq_no), reg_val); - - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_SLIST_DBELL(oq_no), 0xFFFFFFFF); - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no)); - reg_val |= 0x1ULL; - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no), reg_val); -} - -/* Enable all hardware Tx/Rx Queues assigned to VF */ -static void octep_vf_enable_io_queues_cnxk(struct octep_vf_device *oct) -{ - u8 q; - - for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { - octep_vf_enable_iq_cnxk(oct, q); - octep_vf_enable_oq_cnxk(oct, q); - } -} - -/* Disable a hardware Tx Queue assigned to VF */ -static void octep_vf_disable_iq_cnxk(struct octep_vf_device *oct, int iq_no) -{ - u64 reg_val = 0ULL; - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no)); - reg_val &= ~0x1ULL; - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_IN_ENABLE(iq_no), reg_val); -} - -/* Disable a hardware Rx Queue assigned to VF */ -static void octep_vf_disable_oq_cnxk(struct octep_vf_device *oct, int oq_no) -{ - u64 reg_val = 0ULL; - - reg_val = octep_vf_read_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no)); - reg_val &= ~0x1ULL; - octep_vf_write_csr64(oct, CNXK_VF_SDP_R_OUT_ENABLE(oq_no), reg_val); -} - -/* Disable all hardware Tx/Rx Queues assigned to VF */ -static void octep_vf_disable_io_queues_cnxk(struct octep_vf_device *oct) -{ - int q = 0; - - for (q = 0; q < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); q++) { - octep_vf_disable_iq_cnxk(oct, q); - octep_vf_disable_oq_cnxk(oct, q); - } -} - -/* Dump hardware registers (including Tx/Rx queues) for debugging. */ -static void octep_vf_dump_registers_cnxk(struct octep_vf_device *oct) -{ - u8 num_rings, q; - - num_rings = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - for (q = 0; q < num_rings; q++) - cnxk_vf_dump_q_regs(oct, q); -} - -/** - * octep_vf_device_setup_cnxk() - Setup Octeon device. - * - * @oct: Octeon device private data structure. - * - * - initialize hardware operations. - * - get target side pcie port number for the device. - * - set initial configuration and max limits. - */ -void octep_vf_device_setup_cnxk(struct octep_vf_device *oct) -{ - oct->hw_ops.setup_iq_regs = octep_vf_setup_iq_regs_cnxk; - oct->hw_ops.setup_oq_regs = octep_vf_setup_oq_regs_cnxk; - oct->hw_ops.setup_mbox_regs = octep_vf_setup_mbox_regs_cnxk; - - oct->hw_ops.ioq_intr_handler = octep_vf_ioq_intr_handler_cnxk; - oct->hw_ops.reinit_regs = octep_vf_reinit_regs_cnxk; - - oct->hw_ops.enable_interrupts = octep_vf_enable_interrupts_cnxk; - oct->hw_ops.disable_interrupts = octep_vf_disable_interrupts_cnxk; - - oct->hw_ops.update_iq_read_idx = octep_vf_update_iq_read_index_cnxk; - - oct->hw_ops.enable_iq = octep_vf_enable_iq_cnxk; - oct->hw_ops.enable_oq = octep_vf_enable_oq_cnxk; - oct->hw_ops.enable_io_queues = octep_vf_enable_io_queues_cnxk; - - oct->hw_ops.disable_iq = octep_vf_disable_iq_cnxk; - oct->hw_ops.disable_oq = octep_vf_disable_oq_cnxk; - oct->hw_ops.disable_io_queues = octep_vf_disable_io_queues_cnxk; - oct->hw_ops.reset_io_queues = octep_vf_reset_io_queues_cnxk; - - oct->hw_ops.dump_registers = octep_vf_dump_registers_cnxk; - octep_vf_init_config_cnxk_vf(oct); -} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h deleted file mode 100644 index e03a647b0110..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_config.h +++ /dev/null @@ -1,160 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#ifndef _OCTEP_VF_CONFIG_H_ -#define _OCTEP_VF_CONFIG_H_ - -/* Tx instruction types by length */ -#define OCTEP_VF_32BYTE_INSTR 32 -#define OCTEP_VF_64BYTE_INSTR 64 - -/* Tx Queue: maximum descriptors per ring */ -#define OCTEP_VF_IQ_MAX_DESCRIPTORS 1024 -/* Minimum input (Tx) requests to be enqueued to ring doorbell */ -#define OCTEP_VF_DB_MIN 8 -/* Packet threshold for Tx queue interrupt */ -#define OCTEP_VF_IQ_INTR_THRESHOLD 0x0 - -/* Minimum watermark for backpressure */ -#define OCTEP_VF_OQ_WMARK_MIN 256 - -/* Rx Queue: maximum descriptors per ring */ -#define OCTEP_VF_OQ_MAX_DESCRIPTORS 1024 - -/* Rx buffer size: Use page size buffers. - * Build skb from allocated page buffer once the packet is received. - * When a gathered packet is received, make head page as skb head and - * page buffers in consecutive Rx descriptors as fragments. - */ -#define OCTEP_VF_OQ_BUF_SIZE (SKB_WITH_OVERHEAD(PAGE_SIZE)) -#define OCTEP_VF_OQ_PKTS_PER_INTR 128 -#define OCTEP_VF_OQ_REFILL_THRESHOLD (OCTEP_VF_OQ_MAX_DESCRIPTORS / 4) - -#define OCTEP_VF_OQ_INTR_PKT_THRESHOLD 1 -#define OCTEP_VF_OQ_INTR_TIME_THRESHOLD 10 - -#define OCTEP_VF_MSIX_NAME_SIZE (IFNAMSIZ + 32) - -/* Tx Queue wake threshold - * wakeup a stopped Tx queue if minimum 2 descriptors are available. - * Even a skb with fragments consume only one Tx queue descriptor entry. - */ -#define OCTEP_VF_WAKE_QUEUE_THRESHOLD 2 - -/* Minimum MTU supported by Octeon network interface */ -#define OCTEP_VF_MIN_MTU ETH_MIN_MTU -/* Maximum MTU supported by Octeon interface*/ -#define OCTEP_VF_MAX_MTU (10000 - (ETH_HLEN + ETH_FCS_LEN)) -/* Default MTU */ -#define OCTEP_VF_DEFAULT_MTU 1500 - -/* Macros to get octeon config params */ -#define CFG_GET_IQ_CFG(cfg) ((cfg)->iq) -#define CFG_GET_IQ_NUM_DESC(cfg) ((cfg)->iq.num_descs) -#define CFG_GET_IQ_INSTR_TYPE(cfg) ((cfg)->iq.instr_type) -#define CFG_GET_IQ_INSTR_SIZE(cfg) (64) -#define CFG_GET_IQ_DB_MIN(cfg) ((cfg)->iq.db_min) -#define CFG_GET_IQ_INTR_THRESHOLD(cfg) ((cfg)->iq.intr_threshold) - -#define CFG_GET_OQ_NUM_DESC(cfg) ((cfg)->oq.num_descs) -#define CFG_GET_OQ_BUF_SIZE(cfg) ((cfg)->oq.buf_size) -#define CFG_GET_OQ_REFILL_THRESHOLD(cfg) ((cfg)->oq.refill_threshold) -#define CFG_GET_OQ_INTR_PKT(cfg) ((cfg)->oq.oq_intr_pkt) -#define CFG_GET_OQ_INTR_TIME(cfg) ((cfg)->oq.oq_intr_time) -#define CFG_GET_OQ_WMARK(cfg) ((cfg)->oq.wmark) - -#define CFG_GET_PORTS_ACTIVE_IO_RINGS(cfg) ((cfg)->ring_cfg.active_io_rings) -#define CFG_GET_PORTS_MAX_IO_RINGS(cfg) ((cfg)->ring_cfg.max_io_rings) - -#define CFG_GET_CORE_TICS_PER_US(cfg) ((cfg)->core_cfg.core_tics_per_us) -#define CFG_GET_COPROC_TICS_PER_US(cfg) ((cfg)->core_cfg.coproc_tics_per_us) - -#define CFG_GET_IOQ_MSIX(cfg) ((cfg)->msix_cfg.ioq_msix) - -/* Hardware Tx Queue configuration. */ -struct octep_vf_iq_config { - /* Size of the Input queue (number of commands) */ - u16 num_descs; - - /* Command size - 32 or 64 bytes */ - u16 instr_type; - - /* Minimum number of commands pending to be posted to Octeon before driver - * hits the Input queue doorbell. - */ - u16 db_min; - - /* Trigger the IQ interrupt when processed cmd count reaches - * this level. - */ - u32 intr_threshold; -}; - -/* Hardware Rx Queue configuration. */ -struct octep_vf_oq_config { - /* Size of Output queue (number of descriptors) */ - u16 num_descs; - - /* Size of buffer in this Output queue. */ - u16 buf_size; - - /* The number of buffers that were consumed during packet processing - * by the driver on this Output queue before the driver attempts to - * replenish the descriptor ring with new buffers. - */ - u16 refill_threshold; - - /* Interrupt Coalescing (Packet Count). Octeon will interrupt the host - * only if it sent as many packets as specified by this field. - * The driver usually does not use packet count interrupt coalescing. - */ - u32 oq_intr_pkt; - - /* Interrupt Coalescing (Time Interval). Octeon will interrupt the host - * if at least one packet was sent in the time interval specified by - * this field. The driver uses time interval interrupt coalescing by - * default. The time is specified in microseconds. - */ - u32 oq_intr_time; - - /* Water mark for backpressure. - * Output queue sends backpressure signal to source when - * free buffer count falls below wmark. - */ - u32 wmark; -}; - -/* Tx/Rx configuration */ -struct octep_vf_ring_config { - /* Max number of IOQs */ - u16 max_io_rings; - - /* Number of active IOQs */ - u16 active_io_rings; -}; - -/* Octeon MSI-x config. */ -struct octep_vf_msix_config { - /* Number of IOQ interrupts */ - u16 ioq_msix; -}; - -/* Data Structure to hold configuration limits and active config */ -struct octep_vf_config { - /* Input Queue attributes. */ - struct octep_vf_iq_config iq; - - /* Output Queue attributes. */ - struct octep_vf_oq_config oq; - - /* MSI-X interrupt config */ - struct octep_vf_msix_config msix_cfg; - - /* NIC VF ring Configuration */ - struct octep_vf_ring_config ring_cfg; -}; -#endif /* _OCTEP_VF_CONFIG_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c deleted file mode 100644 index 25e6f643cd9f..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c +++ /dev/null @@ -1,307 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#include -#include -#include - -#include "octep_vf_config.h" -#include "octep_vf_main.h" - -static const char octep_vf_gstrings_global_stats[][ETH_GSTRING_LEN] = { - "rx_packets", - "tx_packets", - "rx_bytes", - "tx_bytes", - "rx_alloc_errors", - "tx_busy_errors", - "rx_dropped", - "tx_dropped", - "tx_hw_pkts", - "tx_hw_octs", - "tx_hw_bcast", - "tx_hw_mcast", - "rx_hw_pkts", - "rx_hw_bytes", - "rx_hw_bcast", - "rx_hw_mcast", - "rx_dropped_pkts_fifo_full", - "rx_dropped_bytes_fifo_full", - "rx_err_pkts", -}; - -#define OCTEP_VF_GLOBAL_STATS_CNT (sizeof(octep_vf_gstrings_global_stats) / ETH_GSTRING_LEN) - -static const char octep_vf_gstrings_tx_q_stats[][ETH_GSTRING_LEN] = { - "tx_packets_posted[Q-%u]", - "tx_packets_completed[Q-%u]", - "tx_bytes[Q-%u]", - "tx_busy[Q-%u]", -}; - -#define OCTEP_VF_TX_Q_STATS_CNT (sizeof(octep_vf_gstrings_tx_q_stats) / ETH_GSTRING_LEN) - -static const char octep_vf_gstrings_rx_q_stats[][ETH_GSTRING_LEN] = { - "rx_packets[Q-%u]", - "rx_bytes[Q-%u]", - "rx_alloc_errors[Q-%u]", -}; - -#define OCTEP_VF_RX_Q_STATS_CNT (sizeof(octep_vf_gstrings_rx_q_stats) / ETH_GSTRING_LEN) - -static void octep_vf_get_drvinfo(struct net_device *netdev, - struct ethtool_drvinfo *info) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - - strscpy(info->driver, OCTEP_VF_DRV_NAME, sizeof(info->driver)); - strscpy(info->bus_info, pci_name(oct->pdev), sizeof(info->bus_info)); -} - -static void octep_vf_get_strings(struct net_device *netdev, - u32 stringset, u8 *data) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - char *strings = (char *)data; - int i, j; - - switch (stringset) { - case ETH_SS_STATS: - for (i = 0; i < OCTEP_VF_GLOBAL_STATS_CNT; i++) { - snprintf(strings, ETH_GSTRING_LEN, - octep_vf_gstrings_global_stats[i]); - strings += ETH_GSTRING_LEN; - } - - for (i = 0; i < num_queues; i++) { - for (j = 0; j < OCTEP_VF_TX_Q_STATS_CNT; j++) { - snprintf(strings, ETH_GSTRING_LEN, - octep_vf_gstrings_tx_q_stats[j], i); - strings += ETH_GSTRING_LEN; - } - } - - for (i = 0; i < num_queues; i++) { - for (j = 0; j < OCTEP_VF_RX_Q_STATS_CNT; j++) { - snprintf(strings, ETH_GSTRING_LEN, - octep_vf_gstrings_rx_q_stats[j], i); - strings += ETH_GSTRING_LEN; - } - } - break; - default: - break; - } -} - -static int octep_vf_get_sset_count(struct net_device *netdev, int sset) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - u16 num_queues = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); - - switch (sset) { - case ETH_SS_STATS: - return OCTEP_VF_GLOBAL_STATS_CNT + (num_queues * - (OCTEP_VF_TX_Q_STATS_CNT + OCTEP_VF_RX_Q_STATS_CNT)); - break; - default: - return -EOPNOTSUPP; - } -} - -static void octep_vf_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats *stats, u64 *data) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - struct octep_vf_iface_tx_stats *iface_tx_stats; - struct octep_vf_iface_rx_stats *iface_rx_stats; - u64 rx_alloc_errors, tx_busy_errors; - u64 rx_packets, rx_bytes; - u64 tx_packets, tx_bytes; - int q, i; - - rx_packets = 0; - rx_bytes = 0; - tx_packets = 0; - tx_bytes = 0; - rx_alloc_errors = 0; - tx_busy_errors = 0; - tx_packets = 0; - tx_bytes = 0; - rx_packets = 0; - rx_bytes = 0; - - octep_vf_get_if_stats(oct); - iface_tx_stats = &oct->iface_tx_stats; - iface_rx_stats = &oct->iface_rx_stats; - - for (q = 0; q < oct->num_oqs; q++) { - struct octep_vf_iq *iq = oct->iq[q]; - struct octep_vf_oq *oq = oct->oq[q]; - - tx_packets += iq->stats.instr_completed; - tx_bytes += iq->stats.bytes_sent; - tx_busy_errors += iq->stats.tx_busy; - - rx_packets += oq->stats.packets; - rx_bytes += oq->stats.bytes; - rx_alloc_errors += oq->stats.alloc_failures; - } - i = 0; - data[i++] = rx_packets; - data[i++] = tx_packets; - data[i++] = rx_bytes; - data[i++] = tx_bytes; - data[i++] = rx_alloc_errors; - data[i++] = tx_busy_errors; - data[i++] = iface_rx_stats->dropped_pkts_fifo_full + - iface_rx_stats->err_pkts; - data[i++] = iface_tx_stats->dropped; - data[i++] = iface_tx_stats->pkts; - data[i++] = iface_tx_stats->octs; - data[i++] = iface_tx_stats->bcst; - data[i++] = iface_tx_stats->mcst; - data[i++] = iface_rx_stats->pkts; - data[i++] = iface_rx_stats->octets; - data[i++] = iface_rx_stats->mcast_pkts; - data[i++] = iface_rx_stats->bcast_pkts; - data[i++] = iface_rx_stats->dropped_pkts_fifo_full; - data[i++] = iface_rx_stats->dropped_octets_fifo_full; - data[i++] = iface_rx_stats->err_pkts; - - /* Per Tx Queue stats */ - for (q = 0; q < oct->num_iqs; q++) { - struct octep_vf_iq *iq = oct->iq[q]; - - data[i++] = iq->stats.instr_posted; - data[i++] = iq->stats.instr_completed; - data[i++] = iq->stats.bytes_sent; - data[i++] = iq->stats.tx_busy; - } - - /* Per Rx Queue stats */ - for (q = 0; q < oct->num_oqs; q++) { - struct octep_vf_oq *oq = oct->oq[q]; - - data[i++] = oq->stats.packets; - data[i++] = oq->stats.bytes; - data[i++] = oq->stats.alloc_failures; - } -} - -#define OCTEP_VF_SET_ETHTOOL_LINK_MODES_BITMAP(octep_vf_speeds, ksettings, name) \ -{ \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_T)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseT_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_R)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseR_FEC); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_CR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseCR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_KR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseKR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_LR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseLR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_10GBASE_SR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 10000baseSR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_25GBASE_CR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseCR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_25GBASE_KR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseKR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_25GBASE_SR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 25000baseSR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_CR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseCR4_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_KR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseKR4_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_LR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseLR4_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_40GBASE_SR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 40000baseSR4_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_CR2)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR2_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_KR2)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR2_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_SR2)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR2_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_CR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseCR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_KR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseKR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_LR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseLR_ER_FR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_50GBASE_SR)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 50000baseSR_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_CR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseCR4_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_KR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseKR4_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_LR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseLR4_ER4_Full); \ - if ((octep_vf_speeds) & BIT(OCTEP_VF_LINK_MODE_100GBASE_SR4)) \ - ethtool_link_ksettings_add_link_mode(ksettings, name, 100000baseSR4_Full); \ -} - -static int octep_vf_get_link_ksettings(struct net_device *netdev, - struct ethtool_link_ksettings *cmd) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - struct octep_vf_iface_link_info *link_info; - u32 advertised_modes, supported_modes; - - ethtool_link_ksettings_zero_link_mode(cmd, supported); - ethtool_link_ksettings_zero_link_mode(cmd, advertising); - - octep_vf_get_link_info(oct); - - advertised_modes = oct->link_info.advertised_modes; - supported_modes = oct->link_info.supported_modes; - link_info = &oct->link_info; - - OCTEP_VF_SET_ETHTOOL_LINK_MODES_BITMAP(supported_modes, cmd, supported); - OCTEP_VF_SET_ETHTOOL_LINK_MODES_BITMAP(advertised_modes, cmd, advertising); - - if (link_info->autoneg) { - if (link_info->autoneg & OCTEP_VF_LINK_MODE_AUTONEG_SUPPORTED) - ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); - if (link_info->autoneg & OCTEP_VF_LINK_MODE_AUTONEG_ADVERTISED) { - ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); - cmd->base.autoneg = AUTONEG_ENABLE; - } else { - cmd->base.autoneg = AUTONEG_DISABLE; - } - } else { - cmd->base.autoneg = AUTONEG_DISABLE; - } - - cmd->base.port = PORT_FIBRE; - ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); - ethtool_link_ksettings_add_link_mode(cmd, advertising, FIBRE); - - if (netif_carrier_ok(netdev)) { - cmd->base.speed = link_info->speed; - cmd->base.duplex = DUPLEX_FULL; - } else { - cmd->base.speed = SPEED_UNKNOWN; - cmd->base.duplex = DUPLEX_UNKNOWN; - } - return 0; -} - -static const struct ethtool_ops octep_vf_ethtool_ops = { - .get_drvinfo = octep_vf_get_drvinfo, - .get_link = ethtool_op_get_link, - .get_strings = octep_vf_get_strings, - .get_sset_count = octep_vf_get_sset_count, - .get_ethtool_stats = octep_vf_get_ethtool_stats, - .get_link_ksettings = octep_vf_get_link_ksettings, -}; - -void octep_vf_set_ethtool_ops(struct net_device *netdev) -{ - netdev->ethtool_ops = &octep_vf_ethtool_ops; -} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c deleted file mode 100644 index 773168772f6a..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.c +++ /dev/null @@ -1,1230 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "octep_vf_config.h" -#include "octep_vf_main.h" - -struct workqueue_struct *octep_vf_wq; - -/* Supported Devices */ -static const struct pci_device_id octep_vf_pci_id_tbl[] = { - {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN93_VF)}, - {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF95N_VF)}, - {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN98_VF)}, - {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KA_VF)}, - {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KA_VF)}, - {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CNF10KB_VF)}, - {PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OCTEP_PCI_DEVICE_ID_CN10KB_VF)}, - {0, }, -}; -MODULE_DEVICE_TABLE(pci, octep_vf_pci_id_tbl); - -MODULE_AUTHOR("Veerasenareddy Burru "); -MODULE_DESCRIPTION(OCTEP_VF_DRV_STRING); -MODULE_LICENSE("GPL"); - -/** - * octep_vf_alloc_ioq_vectors() - Allocate Tx/Rx Queue interrupt info. - * - * @oct: Octeon device private data structure. - * - * Allocate resources to hold per Tx/Rx queue interrupt info. - * This is the information passed to interrupt handler, from which napi poll - * is scheduled and includes quick access to private data of Tx/Rx queue - * corresponding to the interrupt being handled. - * - * Return: 0, on successful allocation of resources for all queue interrupts. - * -1, if failed to allocate any resource. - */ -static int octep_vf_alloc_ioq_vectors(struct octep_vf_device *oct) -{ - struct octep_vf_ioq_vector *ioq_vector; - int i; - - for (i = 0; i < oct->num_oqs; i++) { - oct->ioq_vector[i] = vzalloc(sizeof(*oct->ioq_vector[i])); - if (!oct->ioq_vector[i]) - goto free_ioq_vector; - - ioq_vector = oct->ioq_vector[i]; - ioq_vector->iq = oct->iq[i]; - ioq_vector->oq = oct->oq[i]; - ioq_vector->octep_vf_dev = oct; - } - - dev_info(&oct->pdev->dev, "Allocated %d IOQ vectors\n", oct->num_oqs); - return 0; - -free_ioq_vector: - while (i) { - i--; - vfree(oct->ioq_vector[i]); - oct->ioq_vector[i] = NULL; - } - return -1; -} - -/** - * octep_vf_free_ioq_vectors() - Free Tx/Rx Queue interrupt vector info. - * - * @oct: Octeon device private data structure. - */ -static void octep_vf_free_ioq_vectors(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_oqs; i++) { - if (oct->ioq_vector[i]) { - vfree(oct->ioq_vector[i]); - oct->ioq_vector[i] = NULL; - } - } - netdev_info(oct->netdev, "Freed IOQ Vectors\n"); -} - -/** - * octep_vf_enable_msix_range() - enable MSI-x interrupts. - * - * @oct: Octeon device private data structure. - * - * Allocate and enable all MSI-x interrupts (queue and non-queue interrupts) - * for the Octeon device. - * - * Return: 0, on successfully enabling all MSI-x interrupts. - * -1, if failed to enable any MSI-x interrupt. - */ -static int octep_vf_enable_msix_range(struct octep_vf_device *oct) -{ - int num_msix, msix_allocated; - int i; - - /* Generic interrupts apart from input/output queues */ - //num_msix = oct->num_oqs + CFG_GET_NON_IOQ_MSIX(oct->conf); - num_msix = oct->num_oqs; - oct->msix_entries = kcalloc(num_msix, sizeof(struct msix_entry), GFP_KERNEL); - if (!oct->msix_entries) - goto msix_alloc_err; - - for (i = 0; i < num_msix; i++) - oct->msix_entries[i].entry = i; - - msix_allocated = pci_enable_msix_range(oct->pdev, oct->msix_entries, - num_msix, num_msix); - if (msix_allocated != num_msix) { - dev_err(&oct->pdev->dev, - "Failed to enable %d msix irqs; got only %d\n", - num_msix, msix_allocated); - goto enable_msix_err; - } - oct->num_irqs = msix_allocated; - dev_info(&oct->pdev->dev, "MSI-X enabled successfully\n"); - - return 0; - -enable_msix_err: - if (msix_allocated > 0) - pci_disable_msix(oct->pdev); - kfree(oct->msix_entries); - oct->msix_entries = NULL; -msix_alloc_err: - return -1; -} - -/** - * octep_vf_disable_msix() - disable MSI-x interrupts. - * - * @oct: Octeon device private data structure. - * - * Disable MSI-x on the Octeon device. - */ -static void octep_vf_disable_msix(struct octep_vf_device *oct) -{ - pci_disable_msix(oct->pdev); - kfree(oct->msix_entries); - oct->msix_entries = NULL; - dev_info(&oct->pdev->dev, "Disabled MSI-X\n"); -} - -/** - * octep_vf_ioq_intr_handler() - handler for all Tx/Rx queue interrupts. - * - * @irq: Interrupt number. - * @data: interrupt data contains pointers to Tx/Rx queue private data - * and correspong NAPI context. - * - * this is common handler for all non-queue (generic) interrupts. - */ -static irqreturn_t octep_vf_ioq_intr_handler(int irq, void *data) -{ - struct octep_vf_ioq_vector *ioq_vector = data; - struct octep_vf_device *oct = ioq_vector->octep_vf_dev; - - return oct->hw_ops.ioq_intr_handler(ioq_vector); -} - -/** - * octep_vf_request_irqs() - Register interrupt handlers. - * - * @oct: Octeon device private data structure. - * - * Register handlers for all queue and non-queue interrupts. - * - * Return: 0, on successful registration of all interrupt handlers. - * -1, on any error. - */ -static int octep_vf_request_irqs(struct octep_vf_device *oct) -{ - struct net_device *netdev = oct->netdev; - struct octep_vf_ioq_vector *ioq_vector; - struct msix_entry *msix_entry; - int ret, i; - - /* Request IRQs for Tx/Rx queues */ - for (i = 0; i < oct->num_oqs; i++) { - ioq_vector = oct->ioq_vector[i]; - msix_entry = &oct->msix_entries[i]; - - snprintf(ioq_vector->name, sizeof(ioq_vector->name), - "%s-q%d", netdev->name, i); - ret = request_irq(msix_entry->vector, - octep_vf_ioq_intr_handler, 0, - ioq_vector->name, ioq_vector); - if (ret) { - netdev_err(netdev, - "request_irq failed for Q-%d; err=%d", - i, ret); - goto ioq_irq_err; - } - - cpumask_set_cpu(i % num_online_cpus(), - &ioq_vector->affinity_mask); - irq_set_affinity_hint(msix_entry->vector, - &ioq_vector->affinity_mask); - } - - return 0; -ioq_irq_err: - while (i) { - --i; - free_irq(oct->msix_entries[i].vector, oct); - } - return -1; -} - -/** - * octep_vf_free_irqs() - free all registered interrupts. - * - * @oct: Octeon device private data structure. - * - * Free all queue and non-queue interrupts of the Octeon device. - */ -static void octep_vf_free_irqs(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_irqs; i++) { - irq_set_affinity_hint(oct->msix_entries[i].vector, NULL); - free_irq(oct->msix_entries[i].vector, oct->ioq_vector[i]); - } - netdev_info(oct->netdev, "IRQs freed\n"); -} - -/** - * octep_vf_setup_irqs() - setup interrupts for the Octeon device. - * - * @oct: Octeon device private data structure. - * - * Allocate data structures to hold per interrupt information, allocate/enable - * MSI-x interrupt and register interrupt handlers. - * - * Return: 0, on successful allocation and registration of all interrupts. - * -1, on any error. - */ -static int octep_vf_setup_irqs(struct octep_vf_device *oct) -{ - if (octep_vf_alloc_ioq_vectors(oct)) - goto ioq_vector_err; - - if (octep_vf_enable_msix_range(oct)) - goto enable_msix_err; - - if (octep_vf_request_irqs(oct)) - goto request_irq_err; - - return 0; - -request_irq_err: - octep_vf_disable_msix(oct); -enable_msix_err: - octep_vf_free_ioq_vectors(oct); -ioq_vector_err: - return -1; -} - -/** - * octep_vf_clean_irqs() - free all interrupts and its resources. - * - * @oct: Octeon device private data structure. - */ -static void octep_vf_clean_irqs(struct octep_vf_device *oct) -{ - octep_vf_free_irqs(oct); - octep_vf_disable_msix(oct); - octep_vf_free_ioq_vectors(oct); -} - -/** - * octep_vf_enable_ioq_irq() - Enable MSI-x interrupt of a Tx/Rx queue. - * - * @iq: Octeon Tx queue data structure. - * @oq: Octeon Rx queue data structure. - */ -static void octep_vf_enable_ioq_irq(struct octep_vf_iq *iq, struct octep_vf_oq *oq) -{ - u32 pkts_pend = oq->pkts_pending; - - netdev_dbg(iq->netdev, "enabling intr for Q-%u\n", iq->q_no); - if (iq->pkts_processed) { - writel(iq->pkts_processed, iq->inst_cnt_reg); - iq->pkt_in_done -= iq->pkts_processed; - iq->pkts_processed = 0; - } - if (oq->last_pkt_count - pkts_pend) { - writel(oq->last_pkt_count - pkts_pend, oq->pkts_sent_reg); - oq->last_pkt_count = pkts_pend; - } - - /* Flush the previous wrties before writing to RESEND bit */ - smp_wmb(); - writeq(1UL << OCTEP_VF_OQ_INTR_RESEND_BIT, oq->pkts_sent_reg); - writeq(1UL << OCTEP_VF_IQ_INTR_RESEND_BIT, iq->inst_cnt_reg); -} - -/** - * octep_vf_napi_poll() - NAPI poll function for Tx/Rx. - * - * @napi: pointer to napi context. - * @budget: max number of packets to be processed in single invocation. - */ -static int octep_vf_napi_poll(struct napi_struct *napi, int budget) -{ - struct octep_vf_ioq_vector *ioq_vector = - container_of(napi, struct octep_vf_ioq_vector, napi); - u32 tx_pending, rx_done; - - tx_pending = octep_vf_iq_process_completions(ioq_vector->iq, budget); - rx_done = octep_vf_oq_process_rx(ioq_vector->oq, budget); - - /* need more polling if tx completion processing is still pending or - * processed at least 'budget' number of rx packets. - */ - if (tx_pending || rx_done >= budget) - return budget; - - napi_complete(napi); - octep_vf_enable_ioq_irq(ioq_vector->iq, ioq_vector->oq); - return rx_done; -} - -/** - * octep_vf_napi_add() - Add NAPI poll for all Tx/Rx queues. - * - * @oct: Octeon device private data structure. - */ -static void octep_vf_napi_add(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_oqs; i++) { - netdev_dbg(oct->netdev, "Adding NAPI on Q-%d\n", i); - netif_napi_add(oct->netdev, &oct->ioq_vector[i]->napi, octep_vf_napi_poll); - oct->oq[i]->napi = &oct->ioq_vector[i]->napi; - } -} - -/** - * octep_vf_napi_delete() - delete NAPI poll callback for all Tx/Rx queues. - * - * @oct: Octeon device private data structure. - */ -static void octep_vf_napi_delete(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_oqs; i++) { - netdev_dbg(oct->netdev, "Deleting NAPI on Q-%d\n", i); - netif_napi_del(&oct->ioq_vector[i]->napi); - oct->oq[i]->napi = NULL; - } -} - -/** - * octep_vf_napi_enable() - enable NAPI for all Tx/Rx queues. - * - * @oct: Octeon device private data structure. - */ -static void octep_vf_napi_enable(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_oqs; i++) { - netdev_dbg(oct->netdev, "Enabling NAPI on Q-%d\n", i); - napi_enable(&oct->ioq_vector[i]->napi); - } -} - -/** - * octep_vf_napi_disable() - disable NAPI for all Tx/Rx queues. - * - * @oct: Octeon device private data structure. - */ -static void octep_vf_napi_disable(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_oqs; i++) { - netdev_dbg(oct->netdev, "Disabling NAPI on Q-%d\n", i); - napi_disable(&oct->ioq_vector[i]->napi); - } -} - -static void octep_vf_link_up(struct net_device *netdev) -{ - netif_carrier_on(netdev); - netif_tx_start_all_queues(netdev); -} - -static void octep_vf_set_rx_state(struct octep_vf_device *oct, bool up) -{ - int err; - - err = octep_vf_mbox_set_rx_state(oct, up); - if (err) - netdev_err(oct->netdev, "Set Rx state to %d failed with err:%d\n", up, err); -} - -static int octep_vf_get_link_status(struct octep_vf_device *oct) -{ - int err; - - err = octep_vf_mbox_get_link_status(oct, &oct->link_info.oper_up); - if (err) - netdev_err(oct->netdev, "Get link status failed with err:%d\n", err); - return oct->link_info.oper_up; -} - -static void octep_vf_set_link_status(struct octep_vf_device *oct, bool up) -{ - int err; - - err = octep_vf_mbox_set_link_status(oct, up); - if (err) { - netdev_err(oct->netdev, "Set link status to %d failed with err:%d\n", up, err); - return; - } - oct->link_info.oper_up = up; -} - -/** - * octep_vf_open() - start the octeon network device. - * - * @netdev: pointer to kernel network device. - * - * setup Tx/Rx queues, interrupts and enable hardware operation of Tx/Rx queues - * and interrupts.. - * - * Return: 0, on successfully setting up device and bring it up. - * -1, on any error. - */ -static int octep_vf_open(struct net_device *netdev) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - int err, ret; - - netdev_info(netdev, "Starting netdev ...\n"); - netif_carrier_off(netdev); - - oct->hw_ops.reset_io_queues(oct); - - if (octep_vf_setup_iqs(oct)) - goto setup_iq_err; - if (octep_vf_setup_oqs(oct)) - goto setup_oq_err; - if (octep_vf_setup_irqs(oct)) - goto setup_irq_err; - - err = netif_set_real_num_tx_queues(netdev, oct->num_oqs); - if (err) - goto set_queues_err; - err = netif_set_real_num_rx_queues(netdev, oct->num_iqs); - if (err) - goto set_queues_err; - - octep_vf_napi_add(oct); - octep_vf_napi_enable(oct); - - oct->link_info.admin_up = 1; - octep_vf_set_rx_state(oct, true); - - ret = octep_vf_get_link_status(oct); - if (!ret) - octep_vf_set_link_status(oct, true); - - /* Enable the input and output queues for this Octeon device */ - oct->hw_ops.enable_io_queues(oct); - - /* Enable Octeon device interrupts */ - oct->hw_ops.enable_interrupts(oct); - - octep_vf_oq_dbell_init(oct); - - ret = octep_vf_get_link_status(oct); - if (ret) - octep_vf_link_up(netdev); - - return 0; - -set_queues_err: - octep_vf_napi_disable(oct); - octep_vf_napi_delete(oct); - octep_vf_clean_irqs(oct); -setup_irq_err: - octep_vf_free_oqs(oct); -setup_oq_err: - octep_vf_free_iqs(oct); -setup_iq_err: - return -1; -} - -/** - * octep_vf_stop() - stop the octeon network device. - * - * @netdev: pointer to kernel network device. - * - * stop the device Tx/Rx operations, bring down the link and - * free up all resources allocated for Tx/Rx queues and interrupts. - */ -static int octep_vf_stop(struct net_device *netdev) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - - netdev_info(netdev, "Stopping the device ...\n"); - - /* Stop Tx from stack */ - netif_tx_stop_all_queues(netdev); - netif_carrier_off(netdev); - netif_tx_disable(netdev); - - octep_vf_set_link_status(oct, false); - octep_vf_set_rx_state(oct, false); - - oct->link_info.admin_up = 0; - oct->link_info.oper_up = 0; - - oct->hw_ops.disable_interrupts(oct); - octep_vf_napi_disable(oct); - octep_vf_napi_delete(oct); - - octep_vf_clean_irqs(oct); - octep_vf_clean_iqs(oct); - - oct->hw_ops.disable_io_queues(oct); - oct->hw_ops.reset_io_queues(oct); - octep_vf_free_oqs(oct); - octep_vf_free_iqs(oct); - netdev_info(netdev, "Device stopped !!\n"); - return 0; -} - -/** - * octep_vf_iq_full_check() - check if a Tx queue is full. - * - * @iq: Octeon Tx queue data structure. - * - * Return: 0, if the Tx queue is not full. - * 1, if the Tx queue is full. - */ -static int octep_vf_iq_full_check(struct octep_vf_iq *iq) -{ - if (likely((IQ_INSTR_SPACE(iq)) > - OCTEP_VF_WAKE_QUEUE_THRESHOLD)) - return 0; - - /* Stop the queue if unable to send */ - netif_stop_subqueue(iq->netdev, iq->q_no); - - /* check again and restart the queue, in case NAPI has just freed - * enough Tx ring entries. - */ - if (unlikely(IQ_INSTR_SPACE(iq) > - OCTEP_VF_WAKE_QUEUE_THRESHOLD)) { - netif_start_subqueue(iq->netdev, iq->q_no); - iq->stats.restart_cnt++; - return 0; - } - - return 1; -} - -/** - * octep_vf_start_xmit() - Enqueue packet to Octoen hardware Tx Queue. - * - * @skb: packet skbuff pointer. - * @netdev: kernel network device. - * - * Return: NETDEV_TX_BUSY, if Tx Queue is full. - * NETDEV_TX_OK, if successfully enqueued to hardware Tx queue. - */ -static netdev_tx_t octep_vf_start_xmit(struct sk_buff *skb, - struct net_device *netdev) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - netdev_features_t feat = netdev->features; - struct octep_vf_tx_sglist_desc *sglist; - struct octep_vf_tx_buffer *tx_buffer; - struct octep_vf_tx_desc_hw *hw_desc; - struct skb_shared_info *shinfo; - struct octep_vf_instr_hdr *ih; - struct octep_vf_iq *iq; - skb_frag_t *frag; - u16 nr_frags, si; - int xmit_more; - u16 q_no, wi; - - if (skb_put_padto(skb, ETH_ZLEN)) - return NETDEV_TX_OK; - - q_no = skb_get_queue_mapping(skb); - if (q_no >= oct->num_iqs) { - netdev_err(netdev, "Invalid Tx skb->queue_mapping=%d\n", q_no); - q_no = q_no % oct->num_iqs; - } - - iq = oct->iq[q_no]; - if (octep_vf_iq_full_check(iq)) { - iq->stats.tx_busy++; - return NETDEV_TX_BUSY; - } - - shinfo = skb_shinfo(skb); - nr_frags = shinfo->nr_frags; - - wi = iq->host_write_index; - hw_desc = &iq->desc_ring[wi]; - hw_desc->ih64 = 0; - - tx_buffer = iq->buff_info + wi; - tx_buffer->skb = skb; - - ih = &hw_desc->ih; - ih->tlen = skb->len; - ih->pkind = oct->fw_info.pkind; - ih->fsz = oct->fw_info.fsz; - ih->tlen = skb->len + ih->fsz; - - if (!nr_frags) { - tx_buffer->gather = 0; - tx_buffer->dma = dma_map_single(iq->dev, skb->data, - skb->len, DMA_TO_DEVICE); - if (dma_mapping_error(iq->dev, tx_buffer->dma)) - goto dma_map_err; - hw_desc->dptr = tx_buffer->dma; - } else { - /* Scatter/Gather */ - dma_addr_t dma; - u16 len; - - sglist = tx_buffer->sglist; - - ih->gsz = nr_frags + 1; - ih->gather = 1; - tx_buffer->gather = 1; - - len = skb_headlen(skb); - dma = dma_map_single(iq->dev, skb->data, len, DMA_TO_DEVICE); - if (dma_mapping_error(iq->dev, dma)) - goto dma_map_err; - - memset(sglist, 0, OCTEP_VF_SGLIST_SIZE_PER_PKT); - sglist[0].len[3] = len; - sglist[0].dma_ptr[0] = dma; - - si = 1; /* entry 0 is main skb, mapped above */ - frag = &shinfo->frags[0]; - while (nr_frags--) { - len = skb_frag_size(frag); - dma = skb_frag_dma_map(iq->dev, frag, 0, - len, DMA_TO_DEVICE); - if (dma_mapping_error(iq->dev, dma)) - goto dma_map_sg_err; - - sglist[si >> 2].len[3 - (si & 3)] = len; - sglist[si >> 2].dma_ptr[si & 3] = dma; - - frag++; - si++; - } - hw_desc->dptr = tx_buffer->sglist_dma; - } - if (oct->fw_info.tx_ol_flags) { - if ((feat & (NETIF_F_TSO)) && (skb_is_gso(skb))) { - hw_desc->txm.ol_flags = OCTEP_VF_TX_OFFLOAD_CKSUM; - hw_desc->txm.ol_flags |= OCTEP_VF_TX_OFFLOAD_TSO; - hw_desc->txm.gso_size = skb_shinfo(skb)->gso_size; - hw_desc->txm.gso_segs = skb_shinfo(skb)->gso_segs; - } else if (feat & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) { - hw_desc->txm.ol_flags = OCTEP_VF_TX_OFFLOAD_CKSUM; - } - /* due to ESR txm will be swappeed by hw */ - hw_desc->txm64[0] = (__force u64)cpu_to_be64(hw_desc->txm64[0]); - } - - netdev_tx_sent_queue(iq->netdev_q, skb->len); - - xmit_more = netdev_xmit_more(); - - skb_tx_timestamp(skb); - iq->fill_cnt++; - wi++; - iq->host_write_index = wi & iq->ring_size_mask; - if (xmit_more && - (IQ_INSTR_PENDING(iq) < - (iq->max_count - OCTEP_VF_WAKE_QUEUE_THRESHOLD)) && - iq->fill_cnt < iq->fill_threshold) - return NETDEV_TX_OK; - - /* Flush the hw descriptors before writing to doorbell */ - smp_wmb(); - writel(iq->fill_cnt, iq->doorbell_reg); - iq->stats.instr_posted += iq->fill_cnt; - iq->fill_cnt = 0; - return NETDEV_TX_OK; - -dma_map_sg_err: - if (si > 0) { - dma_unmap_single(iq->dev, sglist[0].dma_ptr[0], - sglist[0].len[0], DMA_TO_DEVICE); - sglist[0].len[0] = 0; - } - while (si > 1) { - dma_unmap_page(iq->dev, sglist[si >> 2].dma_ptr[si & 3], - sglist[si >> 2].len[si & 3], DMA_TO_DEVICE); - sglist[si >> 2].len[si & 3] = 0; - si--; - } - tx_buffer->gather = 0; -dma_map_err: - dev_kfree_skb_any(skb); - return NETDEV_TX_OK; -} - -int octep_vf_get_if_stats(struct octep_vf_device *oct) -{ - struct octep_vf_iface_rxtx_stats vf_stats; - int ret, size; - - memset(&vf_stats, 0, sizeof(struct octep_vf_iface_rxtx_stats)); - ret = octep_vf_mbox_bulk_read(oct, OCTEP_PFVF_MBOX_CMD_GET_STATS, - (u8 *)&vf_stats, &size); - if (!ret) { - memcpy(&oct->iface_rx_stats, &vf_stats.iface_rx_stats, - sizeof(struct octep_vf_iface_rx_stats)); - memcpy(&oct->iface_tx_stats, &vf_stats.iface_tx_stats, - sizeof(struct octep_vf_iface_tx_stats)); - } - return ret; -} - -int octep_vf_get_link_info(struct octep_vf_device *oct) -{ - int ret, size; - - ret = octep_vf_mbox_bulk_read(oct, OCTEP_PFVF_MBOX_CMD_GET_LINK_INFO, - (u8 *)&oct->link_info, &size); - if (ret) { - dev_err(&oct->pdev->dev, "Get VF link info failed via VF Mbox\n"); - return ret; - } - return 0; -} - -/** - * octep_vf_get_stats64() - Get Octeon network device statistics. - * - * @netdev: kernel network device. - * @stats: pointer to stats structure to be filled in. - */ -static void octep_vf_get_stats64(struct net_device *netdev, - struct rtnl_link_stats64 *stats) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - u64 tx_packets, tx_bytes, rx_packets, rx_bytes; - int q; - - tx_packets = 0; - tx_bytes = 0; - rx_packets = 0; - rx_bytes = 0; - for (q = 0; q < oct->num_oqs; q++) { - struct octep_vf_iq *iq = oct->iq[q]; - struct octep_vf_oq *oq = oct->oq[q]; - - tx_packets += iq->stats.instr_completed; - tx_bytes += iq->stats.bytes_sent; - rx_packets += oq->stats.packets; - rx_bytes += oq->stats.bytes; - } - stats->tx_packets = tx_packets; - stats->tx_bytes = tx_bytes; - stats->rx_packets = rx_packets; - stats->rx_bytes = rx_bytes; - if (!octep_vf_get_if_stats(oct)) { - stats->multicast = oct->iface_rx_stats.mcast_pkts; - stats->rx_errors = oct->iface_rx_stats.err_pkts; - } -} - -/** - * octep_vf_tx_timeout_task - work queue task to Handle Tx queue timeout. - * - * @work: pointer to Tx queue timeout work_struct - * - * Stop and start the device so that it frees up all queue resources - * and restarts the queues, that potentially clears a Tx queue timeout - * condition. - **/ -static void octep_vf_tx_timeout_task(struct work_struct *work) -{ - struct octep_vf_device *oct = container_of(work, struct octep_vf_device, - tx_timeout_task); - struct net_device *netdev = oct->netdev; - - rtnl_lock(); - if (netif_running(netdev)) { - octep_vf_stop(netdev); - octep_vf_open(netdev); - } - rtnl_unlock(); -} - -/** - * octep_vf_tx_timeout() - Handle Tx Queue timeout. - * - * @netdev: pointer to kernel network device. - * @txqueue: Timed out Tx queue number. - * - * Schedule a work to handle Tx queue timeout. - */ -static void octep_vf_tx_timeout(struct net_device *netdev, unsigned int txqueue) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - - queue_work(octep_vf_wq, &oct->tx_timeout_task); -} - -static int octep_vf_set_mac(struct net_device *netdev, void *p) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - struct sockaddr *addr = (struct sockaddr *)p; - int err; - - if (!is_valid_ether_addr(addr->sa_data)) - return -EADDRNOTAVAIL; - - err = octep_vf_mbox_set_mac_addr(oct, addr->sa_data); - if (err) - return err; - - memcpy(oct->mac_addr, addr->sa_data, ETH_ALEN); - eth_hw_addr_set(netdev, addr->sa_data); - - return 0; -} - -static int octep_vf_change_mtu(struct net_device *netdev, int new_mtu) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - struct octep_vf_iface_link_info *link_info; - int err; - - link_info = &oct->link_info; - if (link_info->mtu == new_mtu) - return 0; - - err = octep_vf_mbox_set_mtu(oct, new_mtu); - if (!err) { - oct->link_info.mtu = new_mtu; - netdev->mtu = new_mtu; - } - return err; -} - -static int octep_vf_set_features(struct net_device *netdev, - netdev_features_t features) -{ - struct octep_vf_device *oct = netdev_priv(netdev); - u16 rx_offloads = 0, tx_offloads = 0; - int err; - - /* We only support features received from firmware */ - if ((features & netdev->hw_features) != features) - return -EINVAL; - - if (features & NETIF_F_TSO) - tx_offloads |= OCTEP_VF_TX_OFFLOAD_TSO; - - if (features & NETIF_F_TSO6) - tx_offloads |= OCTEP_VF_TX_OFFLOAD_TSO; - - if (features & NETIF_F_IP_CSUM) - tx_offloads |= OCTEP_VF_TX_OFFLOAD_CKSUM; - - if (features & NETIF_F_IPV6_CSUM) - tx_offloads |= OCTEP_VF_TX_OFFLOAD_CKSUM; - - if (features & NETIF_F_RXCSUM) - rx_offloads |= OCTEP_VF_RX_OFFLOAD_CKSUM; - - err = octep_vf_mbox_set_offloads(oct, tx_offloads, rx_offloads); - if (!err) - netdev->features = features; - - return err; -} - -static const struct net_device_ops octep_vf_netdev_ops = { - .ndo_open = octep_vf_open, - .ndo_stop = octep_vf_stop, - .ndo_start_xmit = octep_vf_start_xmit, - .ndo_get_stats64 = octep_vf_get_stats64, - .ndo_tx_timeout = octep_vf_tx_timeout, - .ndo_set_mac_address = octep_vf_set_mac, - .ndo_change_mtu = octep_vf_change_mtu, - .ndo_set_features = octep_vf_set_features, -}; - -static const char *octep_vf_devid_to_str(struct octep_vf_device *oct) -{ - switch (oct->chip_id) { - case OCTEP_PCI_DEVICE_ID_CN93_VF: - return "CN93XX"; - case OCTEP_PCI_DEVICE_ID_CNF95N_VF: - return "CNF95N"; - case OCTEP_PCI_DEVICE_ID_CN10KA_VF: - return "CN10KA"; - case OCTEP_PCI_DEVICE_ID_CNF10KA_VF: - return "CNF10KA"; - case OCTEP_PCI_DEVICE_ID_CNF10KB_VF: - return "CNF10KB"; - case OCTEP_PCI_DEVICE_ID_CN10KB_VF: - return "CN10KB"; - default: - return "Unsupported"; - } -} - -/** - * octep_vf_device_setup() - Setup Octeon Device. - * - * @oct: Octeon device private data structure. - * - * Setup Octeon device hardware operations, configuration, etc ... - */ -int octep_vf_device_setup(struct octep_vf_device *oct) -{ - struct pci_dev *pdev = oct->pdev; - - /* allocate memory for oct->conf */ - oct->conf = kzalloc(sizeof(*oct->conf), GFP_KERNEL); - if (!oct->conf) - return -ENOMEM; - - /* Map BAR region 0 */ - oct->mmio.hw_addr = ioremap(pci_resource_start(oct->pdev, 0), - pci_resource_len(oct->pdev, 0)); - if (!oct->mmio.hw_addr) { - dev_err(&pdev->dev, - "Failed to remap BAR0; start=0x%llx len=0x%llx\n", - pci_resource_start(oct->pdev, 0), - pci_resource_len(oct->pdev, 0)); - goto ioremap_err; - } - oct->mmio.mapped = 1; - - oct->chip_id = pdev->device; - oct->rev_id = pdev->revision; - dev_info(&pdev->dev, "chip_id = 0x%x\n", pdev->device); - - switch (oct->chip_id) { - case OCTEP_PCI_DEVICE_ID_CN93_VF: - case OCTEP_PCI_DEVICE_ID_CNF95N_VF: - case OCTEP_PCI_DEVICE_ID_CN98_VF: - dev_info(&pdev->dev, "Setting up OCTEON %s VF PASS%d.%d\n", - octep_vf_devid_to_str(oct), OCTEP_VF_MAJOR_REV(oct), - OCTEP_VF_MINOR_REV(oct)); - octep_vf_device_setup_cn93(oct); - break; - case OCTEP_PCI_DEVICE_ID_CNF10KA_VF: - case OCTEP_PCI_DEVICE_ID_CN10KA_VF: - case OCTEP_PCI_DEVICE_ID_CNF10KB_VF: - case OCTEP_PCI_DEVICE_ID_CN10KB_VF: - dev_info(&pdev->dev, "Setting up OCTEON %s VF PASS%d.%d\n", - octep_vf_devid_to_str(oct), OCTEP_VF_MAJOR_REV(oct), - OCTEP_VF_MINOR_REV(oct)); - octep_vf_device_setup_cnxk(oct); - break; - default: - dev_err(&pdev->dev, "Unsupported device\n"); - goto unsupported_dev; - } - - return 0; - -unsupported_dev: - iounmap(oct->mmio.hw_addr); -ioremap_err: - kfree(oct->conf); - return -EOPNOTSUPP; -} - -/** - * octep_vf_device_cleanup() - Cleanup Octeon Device. - * - * @oct: Octeon device private data structure. - * - * Cleanup Octeon device allocated resources. - */ -static void octep_vf_device_cleanup(struct octep_vf_device *oct) -{ - dev_info(&oct->pdev->dev, "Cleaning up Octeon Device ...\n"); - - if (oct->mmio.mapped) - iounmap(oct->mmio.hw_addr); - - kfree(oct->conf); - oct->conf = NULL; -} - -static int octep_vf_get_mac_addr(struct octep_vf_device *oct, u8 *addr) -{ - return octep_vf_mbox_get_mac_addr(oct, addr); -} - -/** - * octep_vf_probe() - Octeon PCI device probe handler. - * - * @pdev: PCI device structure. - * @ent: entry in Octeon PCI device ID table. - * - * Initializes and enables the Octeon PCI device for network operations. - * Initializes Octeon private data structure and registers a network device. - */ -static int octep_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - struct octep_vf_device *octep_vf_dev; - struct net_device *netdev; - int err; - - err = pci_enable_device(pdev); - if (err) { - dev_err(&pdev->dev, "Failed to enable PCI device\n"); - return err; - } - - err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); - if (err) { - dev_err(&pdev->dev, "Failed to set DMA mask !!\n"); - goto err_dma_mask; - } - - err = pci_request_mem_regions(pdev, OCTEP_VF_DRV_NAME); - if (err) { - dev_err(&pdev->dev, "Failed to map PCI memory regions\n"); - goto err_pci_regions; - } - - pci_set_master(pdev); - - netdev = alloc_etherdev_mq(sizeof(struct octep_vf_device), - OCTEP_VF_MAX_QUEUES); - if (!netdev) { - dev_err(&pdev->dev, "Failed to allocate netdev\n"); - err = -ENOMEM; - goto err_alloc_netdev; - } - SET_NETDEV_DEV(netdev, &pdev->dev); - - octep_vf_dev = netdev_priv(netdev); - octep_vf_dev->netdev = netdev; - octep_vf_dev->pdev = pdev; - octep_vf_dev->dev = &pdev->dev; - pci_set_drvdata(pdev, octep_vf_dev); - - err = octep_vf_device_setup(octep_vf_dev); - if (err) { - dev_err(&pdev->dev, "Device setup failed\n"); - goto err_octep_vf_config; - } - INIT_WORK(&octep_vf_dev->tx_timeout_task, octep_vf_tx_timeout_task); - - netdev->netdev_ops = &octep_vf_netdev_ops; - octep_vf_set_ethtool_ops(netdev); - netif_carrier_off(netdev); - - if (octep_vf_setup_mbox(octep_vf_dev)) { - dev_err(&pdev->dev, "VF Mailbox setup failed\n"); - err = -ENOMEM; - goto err_setup_mbox; - } - - if (octep_vf_mbox_version_check(octep_vf_dev)) { - dev_err(&pdev->dev, "PF VF Mailbox version mismatch\n"); - err = -EINVAL; - goto err_mbox_version; - } - - if (octep_vf_mbox_get_fw_info(octep_vf_dev)) { - dev_err(&pdev->dev, "unable to get fw info\n"); - err = -EINVAL; - goto err_mbox_version; - } - - netdev->hw_features = NETIF_F_SG; - if (OCTEP_VF_TX_IP_CSUM(octep_vf_dev->fw_info.tx_ol_flags)) - netdev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); - - if (OCTEP_VF_RX_IP_CSUM(octep_vf_dev->fw_info.rx_ol_flags)) - netdev->hw_features |= NETIF_F_RXCSUM; - - netdev->min_mtu = OCTEP_VF_MIN_MTU; - netdev->max_mtu = OCTEP_VF_MAX_MTU; - netdev->mtu = OCTEP_VF_DEFAULT_MTU; - - if (OCTEP_VF_TX_TSO(octep_vf_dev->fw_info.tx_ol_flags)) { - netdev->hw_features |= NETIF_F_TSO; - netif_set_tso_max_size(netdev, netdev->max_mtu); - } - - netdev->features |= netdev->hw_features; - octep_vf_get_mac_addr(octep_vf_dev, octep_vf_dev->mac_addr); - eth_hw_addr_set(netdev, octep_vf_dev->mac_addr); - err = register_netdev(netdev); - if (err) { - dev_err(&pdev->dev, "Failed to register netdev\n"); - goto err_register_dev; - } - dev_info(&pdev->dev, "Device probe successful\n"); - return 0; - -err_register_dev: -err_mbox_version: - octep_vf_delete_mbox(octep_vf_dev); -err_setup_mbox: - octep_vf_device_cleanup(octep_vf_dev); -err_octep_vf_config: - free_netdev(netdev); -err_alloc_netdev: - pci_release_mem_regions(pdev); -err_pci_regions: -err_dma_mask: - pci_disable_device(pdev); - dev_err(&pdev->dev, "Device probe failed\n"); - return err; -} - -/** - * octep_vf_remove() - Remove Octeon PCI device from driver control. - * - * @pdev: PCI device structure of the Octeon device. - * - * Cleanup all resources allocated for the Octeon device. - * Unregister from network device and disable the PCI device. - */ -static void octep_vf_remove(struct pci_dev *pdev) -{ - struct octep_vf_device *oct = pci_get_drvdata(pdev); - struct net_device *netdev; - - if (!oct) - return; - - octep_vf_mbox_dev_remove(oct); - cancel_work_sync(&oct->tx_timeout_task); - netdev = oct->netdev; - if (netdev->reg_state == NETREG_REGISTERED) - unregister_netdev(netdev); - octep_vf_delete_mbox(oct); - octep_vf_device_cleanup(oct); - pci_release_mem_regions(pdev); - free_netdev(netdev); - pci_disable_device(pdev); -} - -static struct pci_driver octep_vf_driver = { - .name = OCTEP_VF_DRV_NAME, - .id_table = octep_vf_pci_id_tbl, - .probe = octep_vf_probe, - .remove = octep_vf_remove, -}; - -/** - * octep_vf_init_module() - Module initialization. - * - * create common resource for the driver and register PCI driver. - */ -static int __init octep_vf_init_module(void) -{ - int ret; - - pr_info("%s: Loading %s ...\n", OCTEP_VF_DRV_NAME, OCTEP_VF_DRV_STRING); - - /* work queue for all deferred tasks */ - octep_vf_wq = create_singlethread_workqueue(OCTEP_VF_DRV_NAME); - if (!octep_vf_wq) { - pr_err("%s: Failed to create common workqueue\n", - OCTEP_VF_DRV_NAME); - return -ENOMEM; - } - - ret = pci_register_driver(&octep_vf_driver); - if (ret < 0) { - pr_err("%s: Failed to register PCI driver; err=%d\n", - OCTEP_VF_DRV_NAME, ret); - return ret; - } - - pr_info("%s: Loaded successfully !\n", OCTEP_VF_DRV_NAME); - - return ret; -} - -/** - * octep_vf_exit_module() - Module exit routine. - * - * unregister the driver with PCI subsystem and cleanup common resources. - */ -static void __exit octep_vf_exit_module(void) -{ - pr_info("%s: Unloading ...\n", OCTEP_VF_DRV_NAME); - - pci_unregister_driver(&octep_vf_driver); - destroy_workqueue(octep_vf_wq); - - pr_info("%s: Unloading complete\n", OCTEP_VF_DRV_NAME); -} - -module_init(octep_vf_init_module); -module_exit(octep_vf_exit_module); diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h deleted file mode 100644 index 599e1c7fc651..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_main.h +++ /dev/null @@ -1,338 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#ifndef _OCTEP_VF_MAIN_H_ -#define _OCTEP_VF_MAIN_H_ - -#include "octep_vf_tx.h" -#include "octep_vf_rx.h" -#include "octep_vf_mbox.h" - -#define OCTEP_VF_DRV_NAME "octeon_ep_vf" -#define OCTEP_VF_DRV_STRING "Marvell Octeon EndPoint NIC VF Driver" - -#define OCTEP_PCI_DEVICE_ID_CN93_VF 0xB203 //93xx VF -#define OCTEP_PCI_DEVICE_ID_CNF95N_VF 0xB403 //95N VF -#define OCTEP_PCI_DEVICE_ID_CN98_VF 0xB103 -#define OCTEP_PCI_DEVICE_ID_CN10KA_VF 0xB903 -#define OCTEP_PCI_DEVICE_ID_CNF10KA_VF 0xBA03 -#define OCTEP_PCI_DEVICE_ID_CNF10KB_VF 0xBC03 -#define OCTEP_PCI_DEVICE_ID_CN10KB_VF 0xBD03 - -#define OCTEP_VF_MAX_QUEUES 63 -#define OCTEP_VF_MAX_IQ OCTEP_VF_MAX_QUEUES -#define OCTEP_VF_MAX_OQ OCTEP_VF_MAX_QUEUES - -#define OCTEP_VF_MAX_MSIX_VECTORS OCTEP_VF_MAX_OQ - -#define OCTEP_VF_IQ_INTR_RESEND_BIT 59 -#define OCTEP_VF_OQ_INTR_RESEND_BIT 59 - -#define IQ_INSTR_PENDING(iq) ({ typeof(iq) iq__ = (iq); \ - ((iq__)->host_write_index - (iq__)->flush_index) & \ - (iq__)->ring_size_mask; \ - }) -#define IQ_INSTR_SPACE(iq) ({ typeof(iq) iq_ = (iq); \ - (iq_)->max_count - IQ_INSTR_PENDING(iq_); \ - }) - -#ifndef UINT64_MAX -#define UINT64_MAX ((u64)(~((u64)0))) /* 0xFFFFFFFFFFFFFFFF */ -#endif - -/* PCI address space mapping information. - * Each of the 3 address spaces given by BAR0, BAR2 and BAR4 of - * Octeon gets mapped to different physical address spaces in - * the kernel. - */ -struct octep_vf_mmio { - /* The physical address to which the PCI address space is mapped. */ - u8 __iomem *hw_addr; - - /* Flag indicating the mapping was successful. */ - int mapped; -}; - -struct octep_vf_hw_ops { - void (*setup_iq_regs)(struct octep_vf_device *oct, int q); - void (*setup_oq_regs)(struct octep_vf_device *oct, int q); - void (*setup_mbox_regs)(struct octep_vf_device *oct, int mbox); - - irqreturn_t (*non_ioq_intr_handler)(void *ioq_vector); - irqreturn_t (*ioq_intr_handler)(void *ioq_vector); - void (*reinit_regs)(struct octep_vf_device *oct); - u32 (*update_iq_read_idx)(struct octep_vf_iq *iq); - - void (*enable_interrupts)(struct octep_vf_device *oct); - void (*disable_interrupts)(struct octep_vf_device *oct); - - void (*enable_io_queues)(struct octep_vf_device *oct); - void (*disable_io_queues)(struct octep_vf_device *oct); - void (*enable_iq)(struct octep_vf_device *oct, int q); - void (*disable_iq)(struct octep_vf_device *oct, int q); - void (*enable_oq)(struct octep_vf_device *oct, int q); - void (*disable_oq)(struct octep_vf_device *oct, int q); - void (*reset_io_queues)(struct octep_vf_device *oct); - void (*dump_registers)(struct octep_vf_device *oct); -}; - -/* Octeon mailbox data */ -struct octep_vf_mbox_data { - /* Holds the offset of received data via mailbox. */ - u32 data_index; - - /* Holds the received data via mailbox. */ - u8 recv_data[OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE]; -}; - -/* wrappers around work structs */ -struct octep_vf_mbox_wk { - struct work_struct work; - void *ctxptr; -}; - -/* Octeon device mailbox */ -struct octep_vf_mbox { - /* A mutex to protect access to this q_mbox. */ - struct mutex lock; - - u32 state; - - /* SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */ - u8 __iomem *mbox_int_reg; - - /* SLI_PKT_PF_VF_MBOX_SIG(0) for PF, - * SLI_PKT_PF_VF_MBOX_SIG(1) for VF. - */ - u8 __iomem *mbox_write_reg; - - /* SLI_PKT_PF_VF_MBOX_SIG(1) for PF, - * SLI_PKT_PF_VF_MBOX_SIG(0) for VF. - */ - u8 __iomem *mbox_read_reg; - - /* Octeon mailbox data */ - struct octep_vf_mbox_data mbox_data; - - /* Octeon mailbox work handler to process Mbox messages */ - struct octep_vf_mbox_wk wk; -}; - -/* Tx/Rx queue vector per interrupt. */ -struct octep_vf_ioq_vector { - char name[OCTEP_VF_MSIX_NAME_SIZE]; - struct napi_struct napi; - struct octep_vf_device *octep_vf_dev; - struct octep_vf_iq *iq; - struct octep_vf_oq *oq; - cpumask_t affinity_mask; -}; - -/* Octeon hardware/firmware offload capability flags. */ -#define OCTEP_VF_CAP_TX_CHECKSUM BIT(0) -#define OCTEP_VF_CAP_RX_CHECKSUM BIT(1) -#define OCTEP_VF_CAP_TSO BIT(2) - -/* Link modes */ -enum octep_vf_link_mode_bit_indices { - OCTEP_VF_LINK_MODE_10GBASE_T = 0, - OCTEP_VF_LINK_MODE_10GBASE_R, - OCTEP_VF_LINK_MODE_10GBASE_CR, - OCTEP_VF_LINK_MODE_10GBASE_KR, - OCTEP_VF_LINK_MODE_10GBASE_LR, - OCTEP_VF_LINK_MODE_10GBASE_SR, - OCTEP_VF_LINK_MODE_25GBASE_CR, - OCTEP_VF_LINK_MODE_25GBASE_KR, - OCTEP_VF_LINK_MODE_25GBASE_SR, - OCTEP_VF_LINK_MODE_40GBASE_CR4, - OCTEP_VF_LINK_MODE_40GBASE_KR4, - OCTEP_VF_LINK_MODE_40GBASE_LR4, - OCTEP_VF_LINK_MODE_40GBASE_SR4, - OCTEP_VF_LINK_MODE_50GBASE_CR2, - OCTEP_VF_LINK_MODE_50GBASE_KR2, - OCTEP_VF_LINK_MODE_50GBASE_SR2, - OCTEP_VF_LINK_MODE_50GBASE_CR, - OCTEP_VF_LINK_MODE_50GBASE_KR, - OCTEP_VF_LINK_MODE_50GBASE_LR, - OCTEP_VF_LINK_MODE_50GBASE_SR, - OCTEP_VF_LINK_MODE_100GBASE_CR4, - OCTEP_VF_LINK_MODE_100GBASE_KR4, - OCTEP_VF_LINK_MODE_100GBASE_LR4, - OCTEP_VF_LINK_MODE_100GBASE_SR4, - OCTEP_VF_LINK_MODE_NBITS -}; - -/* Hardware interface link state information. */ -struct octep_vf_iface_link_info { - /* Bitmap of Supported link speeds/modes. */ - u64 supported_modes; - - /* Bitmap of Advertised link speeds/modes. */ - u64 advertised_modes; - - /* Negotiated link speed in Mbps. */ - u32 speed; - - /* MTU */ - u16 mtu; - - /* Autonegotiation state. */ -#define OCTEP_VF_LINK_MODE_AUTONEG_SUPPORTED BIT(0) -#define OCTEP_VF_LINK_MODE_AUTONEG_ADVERTISED BIT(1) - u8 autoneg; - - /* Pause frames setting. */ -#define OCTEP_VF_LINK_MODE_PAUSE_SUPPORTED BIT(0) -#define OCTEP_VF_LINK_MODE_PAUSE_ADVERTISED BIT(1) - u8 pause; - - /* Admin state of the link (ifconfig up/down */ - u8 admin_up; - - /* Operational state of the link: physical link is up down */ - u8 oper_up; -}; - -/* Hardware interface stats information. */ -struct octep_vf_iface_rxtx_stats { - /* Hardware Interface Rx statistics */ - struct octep_vf_iface_rx_stats iface_rx_stats; - - /* Hardware Interface Tx statistics */ - struct octep_vf_iface_tx_stats iface_tx_stats; -}; - -struct octep_vf_fw_info { - /* pkind value to be used in every Tx hardware descriptor */ - u8 pkind; - /* front size data */ - u8 fsz; - /* supported rx offloads OCTEP_VF_RX_OFFLOAD_* */ - u16 rx_ol_flags; - /* supported tx offloads OCTEP_VF_TX_OFFLOAD_* */ - u16 tx_ol_flags; -}; - -/* The Octeon device specific private data structure. - * Each Octeon device has this structure to represent all its components. - */ -struct octep_vf_device { - struct octep_vf_config *conf; - - /* Octeon Chip type. */ - u16 chip_id; - u16 rev_id; - - /* Device capabilities enabled */ - u64 caps_enabled; - /* Device capabilities supported */ - u64 caps_supported; - - /* Pointer to basic Linux device */ - struct device *dev; - /* Linux PCI device pointer */ - struct pci_dev *pdev; - /* Netdev corresponding to the Octeon device */ - struct net_device *netdev; - - /* memory mapped io range */ - struct octep_vf_mmio mmio; - - /* MAC address */ - u8 mac_addr[ETH_ALEN]; - - /* Tx queues (IQ: Instruction Queue) */ - u16 num_iqs; - /* Pointers to Octeon Tx queues */ - struct octep_vf_iq *iq[OCTEP_VF_MAX_IQ]; - - /* Rx queues (OQ: Output Queue) */ - u16 num_oqs; - /* Pointers to Octeon Rx queues */ - struct octep_vf_oq *oq[OCTEP_VF_MAX_OQ]; - - /* Hardware port number of the PCIe interface */ - u16 pcie_port; - - /* Hardware operations */ - struct octep_vf_hw_ops hw_ops; - - /* IRQ info */ - u16 num_irqs; - u16 num_non_ioq_irqs; - char *non_ioq_irq_names; - struct msix_entry *msix_entries; - /* IOq information of it's corresponding MSI-X interrupt. */ - struct octep_vf_ioq_vector *ioq_vector[OCTEP_VF_MAX_QUEUES]; - - /* Hardware Interface Tx statistics */ - struct octep_vf_iface_tx_stats iface_tx_stats; - /* Hardware Interface Rx statistics */ - struct octep_vf_iface_rx_stats iface_rx_stats; - - /* Hardware Interface Link info like supported modes, aneg support */ - struct octep_vf_iface_link_info link_info; - - /* Mailbox to talk to VFs */ - struct octep_vf_mbox *mbox; - - /* Work entry to handle Tx timeout */ - struct work_struct tx_timeout_task; - - /* offset for iface stats */ - u32 ctrl_mbox_ifstats_offset; - - /* Negotiated Mbox version */ - u32 mbox_neg_ver; - - /* firmware info */ - struct octep_vf_fw_info fw_info; -}; - -static inline u16 OCTEP_VF_MAJOR_REV(struct octep_vf_device *oct) -{ - u16 rev = (oct->rev_id & 0xC) >> 2; - - return (rev == 0) ? 1 : rev; -} - -static inline u16 OCTEP_VF_MINOR_REV(struct octep_vf_device *oct) -{ - return (oct->rev_id & 0x3); -} - -/* Octeon CSR read/write access APIs */ -#define octep_vf_write_csr(octep_vf_dev, reg_off, value) \ - writel(value, (octep_vf_dev)->mmio.hw_addr + (reg_off)) - -#define octep_vf_write_csr64(octep_vf_dev, reg_off, val64) \ - writeq(val64, (octep_vf_dev)->mmio.hw_addr + (reg_off)) - -#define octep_vf_read_csr(octep_vf_dev, reg_off) \ - readl((octep_vf_dev)->mmio.hw_addr + (reg_off)) - -#define octep_vf_read_csr64(octep_vf_dev, reg_off) \ - readq((octep_vf_dev)->mmio.hw_addr + (reg_off)) - -extern struct workqueue_struct *octep_vf_wq; - -int octep_vf_device_setup(struct octep_vf_device *oct); -int octep_vf_setup_iqs(struct octep_vf_device *oct); -void octep_vf_free_iqs(struct octep_vf_device *oct); -void octep_vf_clean_iqs(struct octep_vf_device *oct); -int octep_vf_setup_oqs(struct octep_vf_device *oct); -void octep_vf_free_oqs(struct octep_vf_device *oct); -void octep_vf_oq_dbell_init(struct octep_vf_device *oct); -void octep_vf_device_setup_cn93(struct octep_vf_device *oct); -void octep_vf_device_setup_cnxk(struct octep_vf_device *oct); -int octep_vf_iq_process_completions(struct octep_vf_iq *iq, u16 budget); -int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget); -void octep_vf_set_ethtool_ops(struct net_device *netdev); -int octep_vf_get_link_info(struct octep_vf_device *oct); -int octep_vf_get_if_stats(struct octep_vf_device *oct); -void octep_vf_mbox_work(struct work_struct *work); -#endif /* _OCTEP_VF_MAIN_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c deleted file mode 100644 index 594bee15bd36..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.c +++ /dev/null @@ -1,430 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ -#include -#include -#include -#include "octep_vf_config.h" -#include "octep_vf_main.h" - -/* When a new command is implemented, the below table should be updated - * with new command and it's version info. - */ -static u32 pfvf_cmd_versions[OCTEP_PFVF_MBOX_CMD_MAX] = { - [0 ... OCTEP_PFVF_MBOX_CMD_DEV_REMOVE] = OCTEP_PFVF_MBOX_VERSION_V1, - [OCTEP_PFVF_MBOX_CMD_GET_FW_INFO ... OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS] = - OCTEP_PFVF_MBOX_VERSION_V2 -}; - -int octep_vf_setup_mbox(struct octep_vf_device *oct) -{ - int ring = 0; - - oct->mbox = vzalloc(sizeof(*oct->mbox)); - if (!oct->mbox) - return -1; - - mutex_init(&oct->mbox->lock); - - oct->hw_ops.setup_mbox_regs(oct, ring); - INIT_WORK(&oct->mbox->wk.work, octep_vf_mbox_work); - oct->mbox->wk.ctxptr = oct; - oct->mbox_neg_ver = OCTEP_PFVF_MBOX_VERSION_CURRENT; - dev_info(&oct->pdev->dev, "setup vf mbox successfully\n"); - return 0; -} - -void octep_vf_delete_mbox(struct octep_vf_device *oct) -{ - if (oct->mbox) { - if (work_pending(&oct->mbox->wk.work)) - cancel_work_sync(&oct->mbox->wk.work); - - mutex_destroy(&oct->mbox->lock); - vfree(oct->mbox); - oct->mbox = NULL; - dev_info(&oct->pdev->dev, "Deleted vf mbox successfully\n"); - } -} - -int octep_vf_mbox_version_check(struct octep_vf_device *oct) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int ret; - - cmd.u64 = 0; - cmd.s_version.opcode = OCTEP_PFVF_MBOX_CMD_VERSION; - cmd.s_version.version = OCTEP_PFVF_MBOX_VERSION_CURRENT; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret == OCTEP_PFVF_MBOX_CMD_STATUS_NACK) { - dev_err(&oct->pdev->dev, - "VF Mbox version is incompatible with PF\n"); - return -EINVAL; - } - oct->mbox_neg_ver = (u32)rsp.s_version.version; - dev_dbg(&oct->pdev->dev, - "VF Mbox version:%u Negotiated VF version with PF:%u\n", - (u32)cmd.s_version.version, - (u32)rsp.s_version.version); - return 0; -} - -void octep_vf_mbox_work(struct work_struct *work) -{ - struct octep_vf_mbox_wk *wk = container_of(work, struct octep_vf_mbox_wk, work); - struct octep_vf_iface_link_info *link_info; - struct octep_vf_device *oct = NULL; - struct octep_vf_mbox *mbox = NULL; - union octep_pfvf_mbox_word *notif; - u64 pf_vf_data; - - oct = (struct octep_vf_device *)wk->ctxptr; - link_info = &oct->link_info; - mbox = oct->mbox; - pf_vf_data = readq(mbox->mbox_read_reg); - - notif = (union octep_pfvf_mbox_word *)&pf_vf_data; - - switch (notif->s.opcode) { - case OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS: - if (notif->s_link_status.status) { - link_info->oper_up = OCTEP_PFVF_LINK_STATUS_UP; - netif_carrier_on(oct->netdev); - dev_info(&oct->pdev->dev, "netif_carrier_on\n"); - } else { - link_info->oper_up = OCTEP_PFVF_LINK_STATUS_DOWN; - netif_carrier_off(oct->netdev); - dev_info(&oct->pdev->dev, "netif_carrier_off\n"); - } - break; - default: - dev_err(&oct->pdev->dev, - "Received unsupported notif %d\n", notif->s.opcode); - break; - } -} - -static int __octep_vf_mbox_send_cmd(struct octep_vf_device *oct, - union octep_pfvf_mbox_word cmd, - union octep_pfvf_mbox_word *rsp) -{ - struct octep_vf_mbox *mbox = oct->mbox; - u64 reg_val = 0ull; - int count = 0; - - if (!mbox) - return OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP; - - cmd.s.type = OCTEP_PFVF_MBOX_TYPE_CMD; - writeq(cmd.u64, mbox->mbox_write_reg); - - /* No response for notification messages */ - if (!rsp) - return 0; - - for (count = 0; count < OCTEP_PFVF_MBOX_TIMEOUT_WAIT_COUNT; count++) { - usleep_range(1000, 1500); - reg_val = readq(mbox->mbox_write_reg); - if (reg_val != cmd.u64) { - rsp->u64 = reg_val; - break; - } - } - if (count == OCTEP_PFVF_MBOX_TIMEOUT_WAIT_COUNT) { - dev_err(&oct->pdev->dev, "mbox send command timed out\n"); - return OCTEP_PFVF_MBOX_CMD_STATUS_TIMEDOUT; - } - if (rsp->s.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "mbox_send: Received NACK\n"); - return OCTEP_PFVF_MBOX_CMD_STATUS_NACK; - } - rsp->u64 = reg_val; - return 0; -} - -int octep_vf_mbox_send_cmd(struct octep_vf_device *oct, union octep_pfvf_mbox_word cmd, - union octep_pfvf_mbox_word *rsp) -{ - struct octep_vf_mbox *mbox = oct->mbox; - int ret; - - if (!mbox) - return OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP; - mutex_lock(&mbox->lock); - if (pfvf_cmd_versions[cmd.s.opcode] > oct->mbox_neg_ver) { - dev_dbg(&oct->pdev->dev, "CMD:%d not supported in Version:%d\n", - cmd.s.opcode, oct->mbox_neg_ver); - mutex_unlock(&mbox->lock); - return -EOPNOTSUPP; - } - ret = __octep_vf_mbox_send_cmd(oct, cmd, rsp); - mutex_unlock(&mbox->lock); - return ret; -} - -int octep_vf_mbox_bulk_read(struct octep_vf_device *oct, enum octep_pfvf_mbox_opcode opcode, - u8 *data, int *size) -{ - struct octep_vf_mbox *mbox = oct->mbox; - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int data_len = 0, tmp_len = 0; - int read_cnt, i = 0, ret; - - if (!mbox) - return OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP; - - mutex_lock(&mbox->lock); - cmd.u64 = 0; - cmd.s_data.opcode = opcode; - cmd.s_data.frag = 0; - /* Send cmd to read data from PF */ - ret = __octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "send mbox cmd fail for data request\n"); - mutex_unlock(&mbox->lock); - return ret; - } - /* PF sends the data length of requested CMD - * in ACK - */ - data_len = *((int32_t *)rsp.s_data.data); - tmp_len = data_len; - cmd.u64 = 0; - rsp.u64 = 0; - cmd.s_data.opcode = opcode; - cmd.s_data.frag = 1; - while (data_len) { - ret = __octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "send mbox cmd fail for data request\n"); - mutex_unlock(&mbox->lock); - mbox->mbox_data.data_index = 0; - memset(mbox->mbox_data.recv_data, 0, OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE); - return ret; - } - if (data_len > OCTEP_PFVF_MBOX_MAX_DATA_SIZE) { - data_len -= OCTEP_PFVF_MBOX_MAX_DATA_SIZE; - read_cnt = OCTEP_PFVF_MBOX_MAX_DATA_SIZE; - } else { - read_cnt = data_len; - data_len = 0; - } - for (i = 0; i < read_cnt; i++) { - mbox->mbox_data.recv_data[mbox->mbox_data.data_index] = - rsp.s_data.data[i]; - mbox->mbox_data.data_index++; - } - cmd.u64 = 0; - rsp.u64 = 0; - cmd.s_data.opcode = opcode; - cmd.s_data.frag = 1; - } - memcpy(data, mbox->mbox_data.recv_data, tmp_len); - *size = tmp_len; - mbox->mbox_data.data_index = 0; - memset(mbox->mbox_data.recv_data, 0, OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE); - mutex_unlock(&mbox->lock); - return 0; -} - -int octep_vf_mbox_set_mtu(struct octep_vf_device *oct, int mtu) -{ - int frame_size = mtu + ETH_HLEN + ETH_FCS_LEN; - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int ret = 0; - - if (mtu < ETH_MIN_MTU || frame_size > ETH_MAX_MTU) { - dev_err(&oct->pdev->dev, - "Failed to set MTU to %d MIN MTU:%d MAX MTU:%d\n", - mtu, ETH_MIN_MTU, ETH_MAX_MTU); - return -EINVAL; - } - - cmd.u64 = 0; - cmd.s_set_mtu.opcode = OCTEP_PFVF_MBOX_CMD_SET_MTU; - cmd.s_set_mtu.mtu = mtu; - - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "Mbox send failed; err=%d\n", ret); - return ret; - } - if (rsp.s_set_mtu.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "Received Mbox NACK from PF for MTU:%d\n", mtu); - return -EINVAL; - } - - return 0; -} - -int octep_vf_mbox_set_mac_addr(struct octep_vf_device *oct, char *mac_addr) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int i, ret; - - cmd.u64 = 0; - cmd.s_set_mac.opcode = OCTEP_PFVF_MBOX_CMD_SET_MAC_ADDR; - for (i = 0; i < ETH_ALEN; i++) - cmd.s_set_mac.mac_addr[i] = mac_addr[i]; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "Mbox send failed; err = %d\n", ret); - return ret; - } - if (rsp.s_set_mac.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "received NACK\n"); - return -EINVAL; - } - return 0; -} - -int octep_vf_mbox_get_mac_addr(struct octep_vf_device *oct, char *mac_addr) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int i, ret; - - cmd.u64 = 0; - cmd.s_set_mac.opcode = OCTEP_PFVF_MBOX_CMD_GET_MAC_ADDR; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "get_mac: mbox send failed; err = %d\n", ret); - return ret; - } - if (rsp.s_set_mac.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "get_mac: received NACK\n"); - return -EINVAL; - } - for (i = 0; i < ETH_ALEN; i++) - mac_addr[i] = rsp.s_set_mac.mac_addr[i]; - return 0; -} - -int octep_vf_mbox_set_rx_state(struct octep_vf_device *oct, bool state) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int ret; - - cmd.u64 = 0; - cmd.s_link_state.opcode = OCTEP_PFVF_MBOX_CMD_SET_RX_STATE; - cmd.s_link_state.state = state; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "Set Rx state via VF Mbox send failed\n"); - return ret; - } - if (rsp.s_link_state.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "Set Rx state received NACK\n"); - return -EINVAL; - } - return 0; -} - -int octep_vf_mbox_set_link_status(struct octep_vf_device *oct, bool status) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int ret; - - cmd.u64 = 0; - cmd.s_link_status.opcode = OCTEP_PFVF_MBOX_CMD_SET_LINK_STATUS; - cmd.s_link_status.status = status; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "Set link status via VF Mbox send failed\n"); - return ret; - } - if (rsp.s_link_status.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "Set link status received NACK\n"); - return -EINVAL; - } - return 0; -} - -int octep_vf_mbox_get_link_status(struct octep_vf_device *oct, u8 *oper_up) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int ret; - - cmd.u64 = 0; - cmd.s_link_status.opcode = OCTEP_PFVF_MBOX_CMD_GET_LINK_STATUS; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "Get link status via VF Mbox send failed\n"); - return ret; - } - if (rsp.s_link_status.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "Get link status received NACK\n"); - return -EINVAL; - } - *oper_up = rsp.s_link_status.status; - return 0; -} - -int octep_vf_mbox_dev_remove(struct octep_vf_device *oct) -{ - union octep_pfvf_mbox_word cmd; - int ret; - - cmd.u64 = 0; - cmd.s.opcode = OCTEP_PFVF_MBOX_CMD_DEV_REMOVE; - ret = octep_vf_mbox_send_cmd(oct, cmd, NULL); - return ret; -} - -int octep_vf_mbox_get_fw_info(struct octep_vf_device *oct) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int ret; - - cmd.u64 = 0; - cmd.s_fw_info.opcode = OCTEP_PFVF_MBOX_CMD_GET_FW_INFO; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "Get link status via VF Mbox send failed\n"); - return ret; - } - if (rsp.s_fw_info.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "Get link status received NACK\n"); - return -EINVAL; - } - oct->fw_info.pkind = rsp.s_fw_info.pkind; - oct->fw_info.fsz = rsp.s_fw_info.fsz; - oct->fw_info.rx_ol_flags = rsp.s_fw_info.rx_ol_flags; - oct->fw_info.tx_ol_flags = rsp.s_fw_info.tx_ol_flags; - - return 0; -} - -int octep_vf_mbox_set_offloads(struct octep_vf_device *oct, u16 tx_offloads, - u16 rx_offloads) -{ - union octep_pfvf_mbox_word cmd; - union octep_pfvf_mbox_word rsp; - int ret; - - cmd.u64 = 0; - cmd.s_offloads.opcode = OCTEP_PFVF_MBOX_CMD_SET_OFFLOADS; - cmd.s_offloads.rx_ol_flags = rx_offloads; - cmd.s_offloads.tx_ol_flags = tx_offloads; - ret = octep_vf_mbox_send_cmd(oct, cmd, &rsp); - if (ret) { - dev_err(&oct->pdev->dev, "Set offloads via VF Mbox send failed\n"); - return ret; - } - if (rsp.s_link_state.type != OCTEP_PFVF_MBOX_TYPE_RSP_ACK) { - dev_err(&oct->pdev->dev, "Set offloads received NACK\n"); - return -EINVAL; - } - return 0; -} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h deleted file mode 100644 index 9b5efad37eab..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_mbox.h +++ /dev/null @@ -1,166 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Marvell Octeon EP (EndPoint) Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ -#ifndef _OCTEP_VF_MBOX_H_ -#define _OCTEP_VF_MBOX_H_ - -/* When a new command is implemented, VF Mbox version should be bumped. - */ -enum octep_pfvf_mbox_version { - OCTEP_PFVF_MBOX_VERSION_V0, - OCTEP_PFVF_MBOX_VERSION_V1, - OCTEP_PFVF_MBOX_VERSION_V2 -}; - -#define OCTEP_PFVF_MBOX_VERSION_CURRENT OCTEP_PFVF_MBOX_VERSION_V2 - -enum octep_pfvf_mbox_opcode { - OCTEP_PFVF_MBOX_CMD_VERSION, - OCTEP_PFVF_MBOX_CMD_SET_MTU, - OCTEP_PFVF_MBOX_CMD_SET_MAC_ADDR, - OCTEP_PFVF_MBOX_CMD_GET_MAC_ADDR, - OCTEP_PFVF_MBOX_CMD_GET_LINK_INFO, - OCTEP_PFVF_MBOX_CMD_GET_STATS, - OCTEP_PFVF_MBOX_CMD_SET_RX_STATE, - OCTEP_PFVF_MBOX_CMD_SET_LINK_STATUS, - OCTEP_PFVF_MBOX_CMD_GET_LINK_STATUS, - OCTEP_PFVF_MBOX_CMD_GET_MTU, - OCTEP_PFVF_MBOX_CMD_DEV_REMOVE, - OCTEP_PFVF_MBOX_CMD_GET_FW_INFO, - OCTEP_PFVF_MBOX_CMD_SET_OFFLOADS, - OCTEP_PFVF_MBOX_NOTIF_LINK_STATUS, - OCTEP_PFVF_MBOX_CMD_MAX, -}; - -enum octep_pfvf_mbox_word_type { - OCTEP_PFVF_MBOX_TYPE_CMD, - OCTEP_PFVF_MBOX_TYPE_RSP_ACK, - OCTEP_PFVF_MBOX_TYPE_RSP_NACK, -}; - -enum octep_pfvf_mbox_cmd_status { - OCTEP_PFVF_MBOX_CMD_STATUS_NOT_SETUP = 1, - OCTEP_PFVF_MBOX_CMD_STATUS_TIMEDOUT = 2, - OCTEP_PFVF_MBOX_CMD_STATUS_NACK = 3, - OCTEP_PFVF_MBOX_CMD_STATUS_BUSY = 4, - OCTEP_PFVF_MBOX_CMD_STATUS_ERR = 5 -}; - -enum octep_pfvf_link_status { - OCTEP_PFVF_LINK_STATUS_DOWN, - OCTEP_PFVF_LINK_STATUS_UP, -}; - -enum octep_pfvf_link_speed { - OCTEP_PFVF_LINK_SPEED_NONE, - OCTEP_PFVF_LINK_SPEED_1000, - OCTEP_PFVF_LINK_SPEED_10000, - OCTEP_PFVF_LINK_SPEED_25000, - OCTEP_PFVF_LINK_SPEED_40000, - OCTEP_PFVF_LINK_SPEED_50000, - OCTEP_PFVF_LINK_SPEED_100000, - OCTEP_PFVF_LINK_SPEED_LAST, -}; - -enum octep_pfvf_link_duplex { - OCTEP_PFVF_LINK_HALF_DUPLEX, - OCTEP_PFVF_LINK_FULL_DUPLEX, -}; - -enum octep_pfvf_link_autoneg { - OCTEP_PFVF_LINK_AUTONEG, - OCTEP_PFVF_LINK_FIXED, -}; - -#define OCTEP_PFVF_MBOX_TIMEOUT_WAIT_COUNT 8000 -#define OCTEP_PFVF_MBOX_TIMEOUT_WAIT_UDELAY 1000 -#define OCTEP_PFVF_MBOX_MAX_RETRIES 2 -#define OCTEP_PFVF_MBOX_VERSION 0 -#define OCTEP_PFVF_MBOX_MAX_DATA_SIZE 6 -#define OCTEP_PFVF_MBOX_MAX_DATA_BUF_SIZE 320 -#define OCTEP_PFVF_MBOX_MORE_FRAG_FLAG 1 - -union octep_pfvf_mbox_word { - u64 u64; - struct { - u64 opcode:8; - u64 type:2; - u64 rsvd:6; - u64 data:48; - } s; - struct { - u64 opcode:8; - u64 type:2; - u64 frag:1; - u64 rsvd:5; - u8 data[6]; - } s_data; - struct { - u64 opcode:8; - u64 type:2; - u64 rsvd:6; - u64 version:48; - } s_version; - struct { - u64 opcode:8; - u64 type:2; - u64 rsvd:6; - u8 mac_addr[6]; - } s_set_mac; - struct { - u64 opcode:8; - u64 type:2; - u64 rsvd:6; - u64 mtu:48; - } s_set_mtu; - struct { - u64 opcode:8; - u64 type:2; - u64 state:1; - u64 rsvd:53; - } s_link_state; - struct { - u64 opcode:8; - u64 type:2; - u64 status:1; - u64 rsvd:53; - } s_link_status; - struct { - u64 opcode:8; - u64 type:2; - u64 pkind:8; - u64 fsz:8; - u64 rx_ol_flags:16; - u64 tx_ol_flags:16; - u64 rsvd:6; - } s_fw_info; - struct { - u64 opcode:8; - u64 type:2; - u64 rsvd:22; - u64 rx_ol_flags:16; - u64 tx_ol_flags:16; - } s_offloads; -} __packed; - -int octep_vf_setup_mbox(struct octep_vf_device *oct); -void octep_vf_delete_mbox(struct octep_vf_device *oct); -int octep_vf_mbox_send_cmd(struct octep_vf_device *oct, union octep_pfvf_mbox_word cmd, - union octep_pfvf_mbox_word *rsp); -int octep_vf_mbox_bulk_read(struct octep_vf_device *oct, enum octep_pfvf_mbox_opcode opcode, - u8 *data, int *size); -int octep_vf_mbox_set_mtu(struct octep_vf_device *oct, int mtu); -int octep_vf_mbox_set_mac_addr(struct octep_vf_device *oct, char *mac_addr); -int octep_vf_mbox_get_mac_addr(struct octep_vf_device *oct, char *mac_addr); -int octep_vf_mbox_version_check(struct octep_vf_device *oct); -int octep_vf_mbox_set_rx_state(struct octep_vf_device *oct, bool state); -int octep_vf_mbox_set_link_status(struct octep_vf_device *oct, bool status); -int octep_vf_mbox_get_link_status(struct octep_vf_device *oct, u8 *oper_up); -int octep_vf_mbox_dev_remove(struct octep_vf_device *oct); -int octep_vf_mbox_get_fw_info(struct octep_vf_device *oct); -int octep_vf_mbox_set_offloads(struct octep_vf_device *oct, u16 tx_offloads, u16 rx_offloads); - -#endif diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h deleted file mode 100644 index 25e2a876ebba..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cn9k.h +++ /dev/null @@ -1,154 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ -#ifndef _OCTEP_VF_REGS_CN9K_H_ -#define _OCTEP_VF_REGS_CN9K_H_ - -/*############################ RST #########################*/ -#define CN93_VF_CONFIG_XPANSION_BAR 0x38 -#define CN93_VF_CONFIG_PCIE_CAP 0x70 -#define CN93_VF_CONFIG_PCIE_DEVCAP 0x74 -#define CN93_VF_CONFIG_PCIE_DEVCTL 0x78 -#define CN93_VF_CONFIG_PCIE_LINKCAP 0x7C -#define CN93_VF_CONFIG_PCIE_LINKCTL 0x80 -#define CN93_VF_CONFIG_PCIE_SLOTCAP 0x84 -#define CN93_VF_CONFIG_PCIE_SLOTCTL 0x88 - -#define CN93_VF_RING_OFFSET BIT_ULL(17) - -/*###################### RING IN REGISTERS #########################*/ -#define CN93_VF_SDP_R_IN_CONTROL_START 0x10000 -#define CN93_VF_SDP_R_IN_ENABLE_START 0x10010 -#define CN93_VF_SDP_R_IN_INSTR_BADDR_START 0x10020 -#define CN93_VF_SDP_R_IN_INSTR_RSIZE_START 0x10030 -#define CN93_VF_SDP_R_IN_INSTR_DBELL_START 0x10040 -#define CN93_VF_SDP_R_IN_CNTS_START 0x10050 -#define CN93_VF_SDP_R_IN_INT_LEVELS_START 0x10060 -#define CN93_VF_SDP_R_IN_PKT_CNT_START 0x10080 -#define CN93_VF_SDP_R_IN_BYTE_CNT_START 0x10090 - -#define CN93_VF_SDP_R_IN_CONTROL(ring) \ - (CN93_VF_SDP_R_IN_CONTROL_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_ENABLE(ring) \ - (CN93_VF_SDP_R_IN_ENABLE_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_INSTR_BADDR(ring) \ - (CN93_VF_SDP_R_IN_INSTR_BADDR_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_INSTR_RSIZE(ring) \ - (CN93_VF_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_INSTR_DBELL(ring) \ - (CN93_VF_SDP_R_IN_INSTR_DBELL_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_CNTS(ring) \ - (CN93_VF_SDP_R_IN_CNTS_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_INT_LEVELS(ring) \ - (CN93_VF_SDP_R_IN_INT_LEVELS_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_PKT_CNT(ring) \ - (CN93_VF_SDP_R_IN_PKT_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_IN_BYTE_CNT(ring) \ - (CN93_VF_SDP_R_IN_BYTE_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) - -/*------------------ R_IN Masks ----------------*/ - -/** Rings per Virtual Function **/ -#define CN93_VF_R_IN_CTL_RPVF_MASK (0xF) -#define CN93_VF_R_IN_CTL_RPVF_POS (48) - -/* Number of instructions to be read in one MAC read request. - * setting to Max value(4) - **/ -#define CN93_VF_R_IN_CTL_IDLE BIT_ULL(28) -#define CN93_VF_R_IN_CTL_RDSIZE (0x3ULL << 25) -#define CN93_VF_R_IN_CTL_IS_64B BIT_ULL(24) -#define CN93_VF_R_IN_CTL_D_NSR BIT_ULL(8) -#define CN93_VF_R_IN_CTL_D_ESR BIT_ULL(6) -#define CN93_VF_R_IN_CTL_D_ROR BIT_ULL(5) -#define CN93_VF_R_IN_CTL_NSR BIT_ULL(3) -#define CN93_VF_R_IN_CTL_ESR BIT_ULL(1) -#define CN93_VF_R_IN_CTL_ROR BIT_ULL(0) - -#define CN93_VF_R_IN_CTL_MASK (CN93_VF_R_IN_CTL_RDSIZE | CN93_VF_R_IN_CTL_IS_64B) - -/*###################### RING OUT REGISTERS #########################*/ -#define CN93_VF_SDP_R_OUT_CNTS_START 0x10100 -#define CN93_VF_SDP_R_OUT_INT_LEVELS_START 0x10110 -#define CN93_VF_SDP_R_OUT_SLIST_BADDR_START 0x10120 -#define CN93_VF_SDP_R_OUT_SLIST_RSIZE_START 0x10130 -#define CN93_VF_SDP_R_OUT_SLIST_DBELL_START 0x10140 -#define CN93_VF_SDP_R_OUT_CONTROL_START 0x10150 -#define CN93_VF_SDP_R_OUT_ENABLE_START 0x10160 -#define CN93_VF_SDP_R_OUT_PKT_CNT_START 0x10180 -#define CN93_VF_SDP_R_OUT_BYTE_CNT_START 0x10190 - -#define CN93_VF_SDP_R_OUT_CONTROL(ring) \ - (CN93_VF_SDP_R_OUT_CONTROL_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_ENABLE(ring) \ - (CN93_VF_SDP_R_OUT_ENABLE_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_SLIST_BADDR(ring) \ - (CN93_VF_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_SLIST_RSIZE(ring) \ - (CN93_VF_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_SLIST_DBELL(ring) \ - (CN93_VF_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_CNTS(ring) \ - (CN93_VF_SDP_R_OUT_CNTS_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_INT_LEVELS(ring) \ - (CN93_VF_SDP_R_OUT_INT_LEVELS_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_PKT_CNT(ring) \ - (CN93_VF_SDP_R_OUT_PKT_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_OUT_BYTE_CNT(ring) \ - (CN93_VF_SDP_R_OUT_BYTE_CNT_START + ((ring) * CN93_VF_RING_OFFSET)) - -/*------------------ R_OUT Masks ----------------*/ -#define CN93_VF_R_OUT_INT_LEVELS_BMODE BIT_ULL(63) -#define CN93_VF_R_OUT_INT_LEVELS_TIMET (32) - -#define CN93_VF_R_OUT_CTL_IDLE BIT_ULL(40) -#define CN93_VF_R_OUT_CTL_ES_I BIT_ULL(34) -#define CN93_VF_R_OUT_CTL_NSR_I BIT_ULL(33) -#define CN93_VF_R_OUT_CTL_ROR_I BIT_ULL(32) -#define CN93_VF_R_OUT_CTL_ES_D BIT_ULL(30) -#define CN93_VF_R_OUT_CTL_NSR_D BIT_ULL(29) -#define CN93_VF_R_OUT_CTL_ROR_D BIT_ULL(28) -#define CN93_VF_R_OUT_CTL_ES_P BIT_ULL(26) -#define CN93_VF_R_OUT_CTL_NSR_P BIT_ULL(25) -#define CN93_VF_R_OUT_CTL_ROR_P BIT_ULL(24) -#define CN93_VF_R_OUT_CTL_IMODE BIT_ULL(23) - -/* ##################### Mail Box Registers ########################## */ -/* SDP PF to VF Mailbox Data Register */ -#define CN93_VF_SDP_R_MBOX_PF_VF_DATA_START 0x10210 -/* SDP Packet PF to VF Mailbox Interrupt Register */ -#define CN93_VF_SDP_R_MBOX_PF_VF_INT_START 0x10220 -/* SDP VF to PF Mailbox Data Register */ -#define CN93_VF_SDP_R_MBOX_VF_PF_DATA_START 0x10230 - -#define CN93_VF_SDP_R_MBOX_PF_VF_INT_ENAB BIT_ULL(1) -#define CN93_VF_SDP_R_MBOX_PF_VF_INT_STATUS BIT_ULL(0) - -#define CN93_VF_SDP_R_MBOX_PF_VF_DATA(ring) \ - (CN93_VF_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_MBOX_PF_VF_INT(ring) \ - (CN93_VF_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CN93_VF_RING_OFFSET)) - -#define CN93_VF_SDP_R_MBOX_VF_PF_DATA(ring) \ - (CN93_VF_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CN93_VF_RING_OFFSET)) -#endif /* _OCTEP_VF_REGS_CN9K_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h deleted file mode 100644 index 2e156745ef64..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_regs_cnxk.h +++ /dev/null @@ -1,162 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ -#ifndef _OCTEP_VF_REGS_CNXK_H_ -#define _OCTEP_VF_REGS_CNXK_H_ - -/*############################ RST #########################*/ -#define CNXK_VF_CONFIG_XPANSION_BAR 0x38 -#define CNXK_VF_CONFIG_PCIE_CAP 0x70 -#define CNXK_VF_CONFIG_PCIE_DEVCAP 0x74 -#define CNXK_VF_CONFIG_PCIE_DEVCTL 0x78 -#define CNXK_VF_CONFIG_PCIE_LINKCAP 0x7C -#define CNXK_VF_CONFIG_PCIE_LINKCTL 0x80 -#define CNXK_VF_CONFIG_PCIE_SLOTCAP 0x84 -#define CNXK_VF_CONFIG_PCIE_SLOTCTL 0x88 - -#define CNXK_VF_RING_OFFSET (0x1ULL << 17) - -/*###################### RING IN REGISTERS #########################*/ -#define CNXK_VF_SDP_R_IN_CONTROL_START 0x10000 -#define CNXK_VF_SDP_R_IN_ENABLE_START 0x10010 -#define CNXK_VF_SDP_R_IN_INSTR_BADDR_START 0x10020 -#define CNXK_VF_SDP_R_IN_INSTR_RSIZE_START 0x10030 -#define CNXK_VF_SDP_R_IN_INSTR_DBELL_START 0x10040 -#define CNXK_VF_SDP_R_IN_CNTS_START 0x10050 -#define CNXK_VF_SDP_R_IN_INT_LEVELS_START 0x10060 -#define CNXK_VF_SDP_R_IN_PKT_CNT_START 0x10080 -#define CNXK_VF_SDP_R_IN_BYTE_CNT_START 0x10090 -#define CNXK_VF_SDP_R_ERR_TYPE_START 0x10400 - -#define CNXK_VF_SDP_R_ERR_TYPE(ring) \ - (CNXK_VF_SDP_R_ERR_TYPE_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_CONTROL(ring) \ - (CNXK_VF_SDP_R_IN_CONTROL_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_ENABLE(ring) \ - (CNXK_VF_SDP_R_IN_ENABLE_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_INSTR_BADDR(ring) \ - (CNXK_VF_SDP_R_IN_INSTR_BADDR_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_INSTR_RSIZE(ring) \ - (CNXK_VF_SDP_R_IN_INSTR_RSIZE_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_INSTR_DBELL(ring) \ - (CNXK_VF_SDP_R_IN_INSTR_DBELL_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_CNTS(ring) \ - (CNXK_VF_SDP_R_IN_CNTS_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_INT_LEVELS(ring) \ - (CNXK_VF_SDP_R_IN_INT_LEVELS_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_PKT_CNT(ring) \ - (CNXK_VF_SDP_R_IN_PKT_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_IN_BYTE_CNT(ring) \ - (CNXK_VF_SDP_R_IN_BYTE_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) - -/*------------------ R_IN Masks ----------------*/ - -/** Rings per Virtual Function **/ -#define CNXK_VF_R_IN_CTL_RPVF_MASK (0xF) -#define CNXK_VF_R_IN_CTL_RPVF_POS (48) - -/* Number of instructions to be read in one MAC read request. - * setting to Max value(4) - **/ -#define CNXK_VF_R_IN_CTL_IDLE (0x1ULL << 28) -#define CNXK_VF_R_IN_CTL_RDSIZE (0x3ULL << 25) -#define CNXK_VF_R_IN_CTL_IS_64B (0x1ULL << 24) -#define CNXK_VF_R_IN_CTL_D_NSR (0x1ULL << 8) -#define CNXK_VF_R_IN_CTL_D_ESR (0x1ULL << 6) -#define CNXK_VF_R_IN_CTL_D_ROR (0x1ULL << 5) -#define CNXK_VF_R_IN_CTL_NSR (0x1ULL << 3) -#define CNXK_VF_R_IN_CTL_ESR (0x1ULL << 1) -#define CNXK_VF_R_IN_CTL_ROR (0x1ULL << 0) - -#define CNXK_VF_R_IN_CTL_MASK (CNXK_VF_R_IN_CTL_RDSIZE | CNXK_VF_R_IN_CTL_IS_64B) - -/*###################### RING OUT REGISTERS #########################*/ -#define CNXK_VF_SDP_R_OUT_CNTS_START 0x10100 -#define CNXK_VF_SDP_R_OUT_INT_LEVELS_START 0x10110 -#define CNXK_VF_SDP_R_OUT_SLIST_BADDR_START 0x10120 -#define CNXK_VF_SDP_R_OUT_SLIST_RSIZE_START 0x10130 -#define CNXK_VF_SDP_R_OUT_SLIST_DBELL_START 0x10140 -#define CNXK_VF_SDP_R_OUT_CONTROL_START 0x10150 -#define CNXK_VF_SDP_R_OUT_WMARK_START 0x10160 -#define CNXK_VF_SDP_R_OUT_ENABLE_START 0x10170 -#define CNXK_VF_SDP_R_OUT_PKT_CNT_START 0x10180 -#define CNXK_VF_SDP_R_OUT_BYTE_CNT_START 0x10190 - -#define CNXK_VF_SDP_R_OUT_CONTROL(ring) \ - (CNXK_VF_SDP_R_OUT_CONTROL_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_ENABLE(ring) \ - (CNXK_VF_SDP_R_OUT_ENABLE_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_SLIST_BADDR(ring) \ - (CNXK_VF_SDP_R_OUT_SLIST_BADDR_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_SLIST_RSIZE(ring) \ - (CNXK_VF_SDP_R_OUT_SLIST_RSIZE_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_SLIST_DBELL(ring) \ - (CNXK_VF_SDP_R_OUT_SLIST_DBELL_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_WMARK(ring) \ - (CNXK_VF_SDP_R_OUT_WMARK_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_CNTS(ring) \ - (CNXK_VF_SDP_R_OUT_CNTS_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_INT_LEVELS(ring) \ - (CNXK_VF_SDP_R_OUT_INT_LEVELS_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_PKT_CNT(ring) \ - (CNXK_VF_SDP_R_OUT_PKT_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_OUT_BYTE_CNT(ring) \ - (CNXK_VF_SDP_R_OUT_BYTE_CNT_START + ((ring) * CNXK_VF_RING_OFFSET)) - -/*------------------ R_OUT Masks ----------------*/ -#define CNXK_VF_R_OUT_INT_LEVELS_BMODE BIT_ULL(63) -#define CNXK_VF_R_OUT_INT_LEVELS_TIMET (32) - -#define CNXK_VF_R_OUT_CTL_IDLE BIT_ULL(40) -#define CNXK_VF_R_OUT_CTL_ES_I BIT_ULL(34) -#define CNXK_VF_R_OUT_CTL_NSR_I BIT_ULL(33) -#define CNXK_VF_R_OUT_CTL_ROR_I BIT_ULL(32) -#define CNXK_VF_R_OUT_CTL_ES_D BIT_ULL(30) -#define CNXK_VF_R_OUT_CTL_NSR_D BIT_ULL(29) -#define CNXK_VF_R_OUT_CTL_ROR_D BIT_ULL(28) -#define CNXK_VF_R_OUT_CTL_ES_P BIT_ULL(26) -#define CNXK_VF_R_OUT_CTL_NSR_P BIT_ULL(25) -#define CNXK_VF_R_OUT_CTL_ROR_P BIT_ULL(24) -#define CNXK_VF_R_OUT_CTL_IMODE BIT_ULL(23) - -/* ##################### Mail Box Registers ########################## */ -/* SDP PF to VF Mailbox Data Register */ -#define CNXK_VF_SDP_R_MBOX_PF_VF_DATA_START 0x10210 -/* SDP Packet PF to VF Mailbox Interrupt Register */ -#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_START 0x10220 -/* SDP VF to PF Mailbox Data Register */ -#define CNXK_VF_SDP_R_MBOX_VF_PF_DATA_START 0x10230 - -#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_ENAB BIT_ULL(1) -#define CNXK_VF_SDP_R_MBOX_PF_VF_INT_STATUS BIT_ULL(0) - -#define CNXK_VF_SDP_R_MBOX_PF_VF_DATA(ring) \ - (CNXK_VF_SDP_R_MBOX_PF_VF_DATA_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_MBOX_PF_VF_INT(ring) \ - (CNXK_VF_SDP_R_MBOX_PF_VF_INT_START + ((ring) * CNXK_VF_RING_OFFSET)) - -#define CNXK_VF_SDP_R_MBOX_VF_PF_DATA(ring) \ - (CNXK_VF_SDP_R_MBOX_VF_PF_DATA_START + ((ring) * CNXK_VF_RING_OFFSET)) -#endif /* _OCTEP_VF_REGS_CNXK_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c deleted file mode 100644 index 2789ec6669e0..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.c +++ /dev/null @@ -1,511 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#include -#include -#include - -#include "octep_vf_config.h" -#include "octep_vf_main.h" - -static void octep_vf_oq_reset_indices(struct octep_vf_oq *oq) -{ - oq->host_read_idx = 0; - oq->host_refill_idx = 0; - oq->refill_count = 0; - oq->last_pkt_count = 0; - oq->pkts_pending = 0; -} - -/** - * octep_vf_oq_fill_ring_buffers() - fill initial receive buffers for Rx ring. - * - * @oq: Octeon Rx queue data structure. - * - * Return: 0, if successfully filled receive buffers for all descriptors. - * -1, if failed to allocate a buffer or failed to map for DMA. - */ -static int octep_vf_oq_fill_ring_buffers(struct octep_vf_oq *oq) -{ - struct octep_vf_oq_desc_hw *desc_ring = oq->desc_ring; - struct page *page; - u32 i; - - for (i = 0; i < oq->max_count; i++) { - page = dev_alloc_page(); - if (unlikely(!page)) { - dev_err(oq->dev, "Rx buffer alloc failed\n"); - goto rx_buf_alloc_err; - } - desc_ring[i].buffer_ptr = dma_map_page(oq->dev, page, 0, - PAGE_SIZE, - DMA_FROM_DEVICE); - if (dma_mapping_error(oq->dev, desc_ring[i].buffer_ptr)) { - dev_err(oq->dev, - "OQ-%d buffer alloc: DMA mapping error!\n", - oq->q_no); - put_page(page); - goto dma_map_err; - } - oq->buff_info[i].page = page; - } - - return 0; - -dma_map_err: -rx_buf_alloc_err: - while (i) { - i--; - dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr, PAGE_SIZE, DMA_FROM_DEVICE); - put_page(oq->buff_info[i].page); - oq->buff_info[i].page = NULL; - } - - return -1; -} - -/** - * octep_vf_oq_refill() - refill buffers for used Rx ring descriptors. - * - * @oct: Octeon device private data structure. - * @oq: Octeon Rx queue data structure. - * - * Return: number of descriptors successfully refilled with receive buffers. - */ -static int octep_vf_oq_refill(struct octep_vf_device *oct, struct octep_vf_oq *oq) -{ - struct octep_vf_oq_desc_hw *desc_ring = oq->desc_ring; - struct page *page; - u32 refill_idx, i; - - refill_idx = oq->host_refill_idx; - for (i = 0; i < oq->refill_count; i++) { - page = dev_alloc_page(); - if (unlikely(!page)) { - dev_err(oq->dev, "refill: rx buffer alloc failed\n"); - oq->stats.alloc_failures++; - break; - } - - desc_ring[refill_idx].buffer_ptr = dma_map_page(oq->dev, page, 0, - PAGE_SIZE, DMA_FROM_DEVICE); - if (dma_mapping_error(oq->dev, desc_ring[refill_idx].buffer_ptr)) { - dev_err(oq->dev, - "OQ-%d buffer refill: DMA mapping error!\n", - oq->q_no); - put_page(page); - oq->stats.alloc_failures++; - break; - } - oq->buff_info[refill_idx].page = page; - refill_idx++; - if (refill_idx == oq->max_count) - refill_idx = 0; - } - oq->host_refill_idx = refill_idx; - oq->refill_count -= i; - - return i; -} - -/** - * octep_vf_setup_oq() - Setup a Rx queue. - * - * @oct: Octeon device private data structure. - * @q_no: Rx queue number to be setup. - * - * Allocate resources for a Rx queue. - */ -static int octep_vf_setup_oq(struct octep_vf_device *oct, int q_no) -{ - struct octep_vf_oq *oq; - u32 desc_ring_size; - - oq = vzalloc(sizeof(*oq)); - if (!oq) - goto create_oq_fail; - oct->oq[q_no] = oq; - - oq->octep_vf_dev = oct; - oq->netdev = oct->netdev; - oq->dev = &oct->pdev->dev; - oq->q_no = q_no; - oq->max_count = CFG_GET_OQ_NUM_DESC(oct->conf); - oq->ring_size_mask = oq->max_count - 1; - oq->buffer_size = CFG_GET_OQ_BUF_SIZE(oct->conf); - oq->max_single_buffer_size = oq->buffer_size - OCTEP_VF_OQ_RESP_HW_SIZE; - - /* When the hardware/firmware supports additional capabilities, - * additional header is filled-in by Octeon after length field in - * Rx packets. this header contains additional packet information. - */ - if (oct->fw_info.rx_ol_flags) - oq->max_single_buffer_size -= OCTEP_VF_OQ_RESP_HW_EXT_SIZE; - - oq->refill_threshold = CFG_GET_OQ_REFILL_THRESHOLD(oct->conf); - - desc_ring_size = oq->max_count * OCTEP_VF_OQ_DESC_SIZE; - oq->desc_ring = dma_alloc_coherent(oq->dev, desc_ring_size, - &oq->desc_ring_dma, GFP_KERNEL); - - if (unlikely(!oq->desc_ring)) { - dev_err(oq->dev, - "Failed to allocate DMA memory for OQ-%d !!\n", q_no); - goto desc_dma_alloc_err; - } - - oq->buff_info = vzalloc(oq->max_count * OCTEP_VF_OQ_RECVBUF_SIZE); - - if (unlikely(!oq->buff_info)) { - dev_err(&oct->pdev->dev, - "Failed to allocate buffer info for OQ-%d\n", q_no); - goto buf_list_err; - } - - if (octep_vf_oq_fill_ring_buffers(oq)) - goto oq_fill_buff_err; - - octep_vf_oq_reset_indices(oq); - oct->hw_ops.setup_oq_regs(oct, q_no); - oct->num_oqs++; - - return 0; - -oq_fill_buff_err: - vfree(oq->buff_info); - oq->buff_info = NULL; -buf_list_err: - dma_free_coherent(oq->dev, desc_ring_size, - oq->desc_ring, oq->desc_ring_dma); - oq->desc_ring = NULL; -desc_dma_alloc_err: - vfree(oq); - oct->oq[q_no] = NULL; -create_oq_fail: - return -1; -} - -/** - * octep_vf_oq_free_ring_buffers() - Free ring buffers. - * - * @oq: Octeon Rx queue data structure. - * - * Free receive buffers in unused Rx queue descriptors. - */ -static void octep_vf_oq_free_ring_buffers(struct octep_vf_oq *oq) -{ - struct octep_vf_oq_desc_hw *desc_ring = oq->desc_ring; - int i; - - if (!oq->desc_ring || !oq->buff_info) - return; - - for (i = 0; i < oq->max_count; i++) { - if (oq->buff_info[i].page) { - dma_unmap_page(oq->dev, desc_ring[i].buffer_ptr, - PAGE_SIZE, DMA_FROM_DEVICE); - put_page(oq->buff_info[i].page); - oq->buff_info[i].page = NULL; - desc_ring[i].buffer_ptr = 0; - } - } - octep_vf_oq_reset_indices(oq); -} - -/** - * octep_vf_free_oq() - Free Rx queue resources. - * - * @oq: Octeon Rx queue data structure. - * - * Free all resources of a Rx queue. - */ -static int octep_vf_free_oq(struct octep_vf_oq *oq) -{ - struct octep_vf_device *oct = oq->octep_vf_dev; - int q_no = oq->q_no; - - octep_vf_oq_free_ring_buffers(oq); - - if (oq->buff_info) - vfree(oq->buff_info); - - if (oq->desc_ring) - dma_free_coherent(oq->dev, - oq->max_count * OCTEP_VF_OQ_DESC_SIZE, - oq->desc_ring, oq->desc_ring_dma); - - vfree(oq); - oct->oq[q_no] = NULL; - oct->num_oqs--; - return 0; -} - -/** - * octep_vf_setup_oqs() - setup resources for all Rx queues. - * - * @oct: Octeon device private data structure. - */ -int octep_vf_setup_oqs(struct octep_vf_device *oct) -{ - int i, retval = 0; - - oct->num_oqs = 0; - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { - retval = octep_vf_setup_oq(oct, i); - if (retval) { - dev_err(&oct->pdev->dev, - "Failed to setup OQ(RxQ)-%d.\n", i); - goto oq_setup_err; - } - dev_dbg(&oct->pdev->dev, "Successfully setup OQ(RxQ)-%d.\n", i); - } - - return 0; - -oq_setup_err: - while (i) { - i--; - octep_vf_free_oq(oct->oq[i]); - } - return -1; -} - -/** - * octep_vf_oq_dbell_init() - Initialize Rx queue doorbell. - * - * @oct: Octeon device private data structure. - * - * Write number of descriptors to Rx queue doorbell register. - */ -void octep_vf_oq_dbell_init(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_oqs; i++) - writel(oct->oq[i]->max_count, oct->oq[i]->pkts_credit_reg); -} - -/** - * octep_vf_free_oqs() - Free resources of all Rx queues. - * - * @oct: Octeon device private data structure. - */ -void octep_vf_free_oqs(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { - if (!oct->oq[i]) - continue; - octep_vf_free_oq(oct->oq[i]); - dev_dbg(&oct->pdev->dev, - "Successfully freed OQ(RxQ)-%d.\n", i); - } -} - -/** - * octep_vf_oq_check_hw_for_pkts() - Check for new Rx packets. - * - * @oct: Octeon device private data structure. - * @oq: Octeon Rx queue data structure. - * - * Return: packets received after previous check. - */ -static int octep_vf_oq_check_hw_for_pkts(struct octep_vf_device *oct, - struct octep_vf_oq *oq) -{ - u32 pkt_count, new_pkts; - - pkt_count = readl(oq->pkts_sent_reg); - new_pkts = pkt_count - oq->last_pkt_count; - - /* Clear the hardware packets counter register if the rx queue is - * being processed continuously with-in a single interrupt and - * reached half its max value. - * this counter is not cleared every time read, to save write cycles. - */ - if (unlikely(pkt_count > 0xF0000000U)) { - writel(pkt_count, oq->pkts_sent_reg); - pkt_count = readl(oq->pkts_sent_reg); - new_pkts += pkt_count; - } - oq->last_pkt_count = pkt_count; - oq->pkts_pending += new_pkts; - return new_pkts; -} - -/** - * __octep_vf_oq_process_rx() - Process hardware Rx queue and push to stack. - * - * @oct: Octeon device private data structure. - * @oq: Octeon Rx queue data structure. - * @pkts_to_process: number of packets to be processed. - * - * Process the new packets in Rx queue. - * Packets larger than single Rx buffer arrive in consecutive descriptors. - * But, count returned by the API only accounts full packets, not fragments. - * - * Return: number of packets processed and pushed to stack. - */ -static int __octep_vf_oq_process_rx(struct octep_vf_device *oct, - struct octep_vf_oq *oq, u16 pkts_to_process) -{ - struct octep_vf_oq_resp_hw_ext *resp_hw_ext = NULL; - netdev_features_t feat = oq->netdev->features; - struct octep_vf_rx_buffer *buff_info; - struct octep_vf_oq_resp_hw *resp_hw; - u32 pkt, rx_bytes, desc_used; - u16 data_offset, rx_ol_flags; - struct sk_buff *skb; - u32 read_idx; - - read_idx = oq->host_read_idx; - rx_bytes = 0; - desc_used = 0; - for (pkt = 0; pkt < pkts_to_process; pkt++) { - buff_info = (struct octep_vf_rx_buffer *)&oq->buff_info[read_idx]; - dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, - PAGE_SIZE, DMA_FROM_DEVICE); - resp_hw = page_address(buff_info->page); - buff_info->page = NULL; - - /* Swap the length field that is in Big-Endian to CPU */ - buff_info->len = be64_to_cpu(resp_hw->length); - if (oct->fw_info.rx_ol_flags) { - /* Extended response header is immediately after - * response header (resp_hw) - */ - resp_hw_ext = (struct octep_vf_oq_resp_hw_ext *) - (resp_hw + 1); - buff_info->len -= OCTEP_VF_OQ_RESP_HW_EXT_SIZE; - /* Packet Data is immediately after - * extended response header. - */ - data_offset = OCTEP_VF_OQ_RESP_HW_SIZE + - OCTEP_VF_OQ_RESP_HW_EXT_SIZE; - rx_ol_flags = resp_hw_ext->rx_ol_flags; - } else { - /* Data is immediately after - * Hardware Rx response header. - */ - data_offset = OCTEP_VF_OQ_RESP_HW_SIZE; - rx_ol_flags = 0; - } - rx_bytes += buff_info->len; - - if (buff_info->len <= oq->max_single_buffer_size) { - skb = build_skb((void *)resp_hw, PAGE_SIZE); - skb_reserve(skb, data_offset); - skb_put(skb, buff_info->len); - read_idx++; - desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; - } else { - struct skb_shared_info *shinfo; - u16 data_len; - - skb = build_skb((void *)resp_hw, PAGE_SIZE); - skb_reserve(skb, data_offset); - /* Head fragment includes response header(s); - * subsequent fragments contains only data. - */ - skb_put(skb, oq->max_single_buffer_size); - read_idx++; - desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; - - shinfo = skb_shinfo(skb); - data_len = buff_info->len - oq->max_single_buffer_size; - while (data_len) { - dma_unmap_page(oq->dev, oq->desc_ring[read_idx].buffer_ptr, - PAGE_SIZE, DMA_FROM_DEVICE); - buff_info = (struct octep_vf_rx_buffer *) - &oq->buff_info[read_idx]; - if (data_len < oq->buffer_size) { - buff_info->len = data_len; - data_len = 0; - } else { - buff_info->len = oq->buffer_size; - data_len -= oq->buffer_size; - } - - skb_add_rx_frag(skb, shinfo->nr_frags, - buff_info->page, 0, - buff_info->len, - buff_info->len); - buff_info->page = NULL; - read_idx++; - desc_used++; - if (read_idx == oq->max_count) - read_idx = 0; - } - } - - skb->dev = oq->netdev; - skb->protocol = eth_type_trans(skb, skb->dev); - if (feat & NETIF_F_RXCSUM && - OCTEP_VF_RX_CSUM_VERIFIED(rx_ol_flags)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - else - skb->ip_summed = CHECKSUM_NONE; - napi_gro_receive(oq->napi, skb); - } - - oq->host_read_idx = read_idx; - oq->refill_count += desc_used; - oq->stats.packets += pkt; - oq->stats.bytes += rx_bytes; - - return pkt; -} - -/** - * octep_vf_oq_process_rx() - Process Rx queue. - * - * @oq: Octeon Rx queue data structure. - * @budget: max number of packets can be processed in one invocation. - * - * Check for newly received packets and process them. - * Keeps checking for new packets until budget is used or no new packets seen. - * - * Return: number of packets processed. - */ -int octep_vf_oq_process_rx(struct octep_vf_oq *oq, int budget) -{ - u32 pkts_available, pkts_processed, total_pkts_processed; - struct octep_vf_device *oct = oq->octep_vf_dev; - - pkts_available = 0; - pkts_processed = 0; - total_pkts_processed = 0; - while (total_pkts_processed < budget) { - /* update pending count only when current one exhausted */ - if (oq->pkts_pending == 0) - octep_vf_oq_check_hw_for_pkts(oct, oq); - pkts_available = min(budget - total_pkts_processed, - oq->pkts_pending); - if (!pkts_available) - break; - - pkts_processed = __octep_vf_oq_process_rx(oct, oq, - pkts_available); - oq->pkts_pending -= pkts_processed; - total_pkts_processed += pkts_processed; - } - - if (oq->refill_count >= oq->refill_threshold) { - u32 desc_refilled = octep_vf_oq_refill(oct, oq); - - /* flush pending writes before updating credits */ - smp_wmb(); - writel(desc_refilled, oq->pkts_credit_reg); - } - - return total_pkts_processed; -} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h deleted file mode 100644 index fe46838b5200..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_rx.h +++ /dev/null @@ -1,224 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#ifndef _OCTEP_VF_RX_H_ -#define _OCTEP_VF_RX_H_ - -/* struct octep_vf_oq_desc_hw - Octeon Hardware OQ descriptor format. - * - * The descriptor ring is made of descriptors which have 2 64-bit values: - * - * @buffer_ptr: DMA address of the skb->data - * @info_ptr: DMA address of host memory, used to update pkt count by hw. - * This is currently unused to save pci writes. - */ -struct octep_vf_oq_desc_hw { - dma_addr_t buffer_ptr; - u64 info_ptr; -}; - -static_assert(sizeof(struct octep_vf_oq_desc_hw) == 16); - -#define OCTEP_VF_OQ_DESC_SIZE (sizeof(struct octep_vf_oq_desc_hw)) - -/* Rx offload flags */ -#define OCTEP_VF_RX_OFFLOAD_VLAN_STRIP BIT(0) -#define OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM BIT(1) -#define OCTEP_VF_RX_OFFLOAD_UDP_CKSUM BIT(2) -#define OCTEP_VF_RX_OFFLOAD_TCP_CKSUM BIT(3) - -#define OCTEP_VF_RX_OFFLOAD_CKSUM (OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM | \ - OCTEP_VF_RX_OFFLOAD_UDP_CKSUM | \ - OCTEP_VF_RX_OFFLOAD_TCP_CKSUM) - -#define OCTEP_VF_RX_IP_CSUM(flags) ((flags) & \ - (OCTEP_VF_RX_OFFLOAD_IPV4_CKSUM | \ - OCTEP_VF_RX_OFFLOAD_TCP_CKSUM | \ - OCTEP_VF_RX_OFFLOAD_UDP_CKSUM)) - -/* bit 0 is vlan strip */ -#define OCTEP_VF_RX_CSUM_IP_VERIFIED BIT(1) -#define OCTEP_VF_RX_CSUM_L4_VERIFIED BIT(2) - -#define OCTEP_VF_RX_CSUM_VERIFIED(flags) ((flags) & \ - (OCTEP_VF_RX_CSUM_L4_VERIFIED | \ - OCTEP_VF_RX_CSUM_IP_VERIFIED)) - -/* Extended Response Header in packet data received from Hardware. - * Includes metadata like checksum status. - * this is valid only if hardware/firmware published support for this. - * This is at offset 0 of packet data (skb->data). - */ -struct octep_vf_oq_resp_hw_ext { - /* Reserved. */ - u64 rsvd:48; - - /* rx offload flags */ - u16 rx_ol_flags; -}; - -static_assert(sizeof(struct octep_vf_oq_resp_hw_ext) == 8); - -#define OCTEP_VF_OQ_RESP_HW_EXT_SIZE (sizeof(struct octep_vf_oq_resp_hw_ext)) - -/* Length of Rx packet DMA'ed by Octeon to Host. - * this is in bigendian; so need to be converted to cpu endian. - * Octeon writes this at the beginning of Rx buffer (skb->data). - */ -struct octep_vf_oq_resp_hw { - /* The Length of the packet. */ - __be64 length; -}; - -static_assert(sizeof(struct octep_vf_oq_resp_hw) == 8); - -#define OCTEP_VF_OQ_RESP_HW_SIZE (sizeof(struct octep_vf_oq_resp_hw)) - -/* Pointer to data buffer. - * Driver keeps a pointer to the data buffer that it made available to - * the Octeon device. Since the descriptor ring keeps physical (bus) - * addresses, this field is required for the driver to keep track of - * the virtual address pointers. The fields are operated by - * OS-dependent routines. - */ -struct octep_vf_rx_buffer { - struct page *page; - - /* length from rx hardware descriptor after converting to cpu endian */ - u64 len; -}; - -#define OCTEP_VF_OQ_RECVBUF_SIZE (sizeof(struct octep_vf_rx_buffer)) - -/* Output Queue statistics. Each output queue has four stats fields. */ -struct octep_vf_oq_stats { - /* Number of packets received from the Device. */ - u64 packets; - - /* Number of bytes received from the Device. */ - u64 bytes; - - /* Number of times failed to allocate buffers. */ - u64 alloc_failures; -}; - -#define OCTEP_VF_OQ_STATS_SIZE (sizeof(struct octep_vf_oq_stats)) - -/* Hardware interface Rx statistics */ -struct octep_vf_iface_rx_stats { - /* Received packets */ - u64 pkts; - - /* Octets of received packets */ - u64 octets; - - /* Received PAUSE and Control packets */ - u64 pause_pkts; - - /* Received PAUSE and Control octets */ - u64 pause_octets; - - /* Filtered DMAC0 packets */ - u64 dmac0_pkts; - - /* Filtered DMAC0 octets */ - u64 dmac0_octets; - - /* Packets dropped due to RX FIFO full */ - u64 dropped_pkts_fifo_full; - - /* Octets dropped due to RX FIFO full */ - u64 dropped_octets_fifo_full; - - /* Error packets */ - u64 err_pkts; - - /* Filtered DMAC1 packets */ - u64 dmac1_pkts; - - /* Filtered DMAC1 octets */ - u64 dmac1_octets; - - /* NCSI-bound packets dropped */ - u64 ncsi_dropped_pkts; - - /* NCSI-bound octets dropped */ - u64 ncsi_dropped_octets; - - /* Multicast packets received. */ - u64 mcast_pkts; - - /* Broadcast packets received. */ - u64 bcast_pkts; - -}; - -/* The Descriptor Ring Output Queue structure. - * This structure has all the information required to implement a - * Octeon OQ. - */ -struct octep_vf_oq { - u32 q_no; - - struct octep_vf_device *octep_vf_dev; - struct net_device *netdev; - struct device *dev; - - struct napi_struct *napi; - - /* The receive buffer list. This list has the virtual addresses - * of the buffers. - */ - struct octep_vf_rx_buffer *buff_info; - - /* Pointer to the mapped packet credit register. - * Host writes number of info/buffer ptrs available to this register - */ - u8 __iomem *pkts_credit_reg; - - /* Pointer to the mapped packet sent register. - * Octeon writes the number of packets DMA'ed to host memory - * in this register. - */ - u8 __iomem *pkts_sent_reg; - - /* Statistics for this OQ. */ - struct octep_vf_oq_stats stats; - - /* Packets pending to be processed */ - u32 pkts_pending; - u32 last_pkt_count; - - /* Index in the ring where the driver should read the next packet */ - u32 host_read_idx; - - /* Number of descriptors in this ring. */ - u32 max_count; - u32 ring_size_mask; - - /* The number of descriptors pending refill. */ - u32 refill_count; - - /* Index in the ring where the driver will refill the - * descriptor's buffer - */ - u32 host_refill_idx; - u32 refill_threshold; - - /* The size of each buffer pointed by the buffer pointer. */ - u32 buffer_size; - u32 max_single_buffer_size; - - /* The 8B aligned descriptor ring starts at this address. */ - struct octep_vf_oq_desc_hw *desc_ring; - - /* DMA mapped address of the OQ descriptor ring. */ - dma_addr_t desc_ring_dma; -}; - -#define OCTEP_VF_OQ_SIZE (sizeof(struct octep_vf_oq)) -#endif /* _OCTEP_VF_RX_H_ */ diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c deleted file mode 100644 index bcd8702832a0..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.c +++ /dev/null @@ -1,331 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#include -#include -#include - -#include "octep_vf_config.h" -#include "octep_vf_main.h" - -/* Reset various index of Tx queue data structure. */ -static void octep_vf_iq_reset_indices(struct octep_vf_iq *iq) -{ - iq->fill_cnt = 0; - iq->host_write_index = 0; - iq->octep_vf_read_index = 0; - iq->flush_index = 0; - iq->pkts_processed = 0; - iq->pkt_in_done = 0; -} - -/** - * octep_vf_iq_process_completions() - Process Tx queue completions. - * - * @iq: Octeon Tx queue data structure. - * @budget: max number of completions to be processed in one invocation. - */ -int octep_vf_iq_process_completions(struct octep_vf_iq *iq, u16 budget) -{ - u32 compl_pkts, compl_bytes, compl_sg; - struct octep_vf_device *oct = iq->octep_vf_dev; - struct octep_vf_tx_buffer *tx_buffer; - struct skb_shared_info *shinfo; - u32 fi = iq->flush_index; - struct sk_buff *skb; - u8 frags, i; - - compl_pkts = 0; - compl_sg = 0; - compl_bytes = 0; - iq->octep_vf_read_index = oct->hw_ops.update_iq_read_idx(iq); - - while (likely(budget && (fi != iq->octep_vf_read_index))) { - tx_buffer = iq->buff_info + fi; - skb = tx_buffer->skb; - - fi++; - if (unlikely(fi == iq->max_count)) - fi = 0; - compl_bytes += skb->len; - compl_pkts++; - budget--; - - if (!tx_buffer->gather) { - dma_unmap_single(iq->dev, tx_buffer->dma, - tx_buffer->skb->len, DMA_TO_DEVICE); - dev_kfree_skb_any(skb); - continue; - } - - /* Scatter/Gather */ - shinfo = skb_shinfo(skb); - frags = shinfo->nr_frags; - compl_sg++; - - dma_unmap_single(iq->dev, tx_buffer->sglist[0].dma_ptr[0], - tx_buffer->sglist[0].len[3], DMA_TO_DEVICE); - - i = 1; /* entry 0 is main skb, unmapped above */ - while (frags--) { - dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], - tx_buffer->sglist[i >> 2].len[3 - (i & 3)], DMA_TO_DEVICE); - i++; - } - - dev_kfree_skb_any(skb); - } - - iq->pkts_processed += compl_pkts; - iq->stats.instr_completed += compl_pkts; - iq->stats.bytes_sent += compl_bytes; - iq->stats.sgentry_sent += compl_sg; - iq->flush_index = fi; - - netdev_tx_completed_queue(iq->netdev_q, compl_pkts, compl_bytes); - - if (unlikely(__netif_subqueue_stopped(iq->netdev, iq->q_no)) && - (IQ_INSTR_SPACE(iq) > - OCTEP_VF_WAKE_QUEUE_THRESHOLD)) - netif_wake_subqueue(iq->netdev, iq->q_no); - return !budget; -} - -/** - * octep_vf_iq_free_pending() - Free Tx buffers for pending completions. - * - * @iq: Octeon Tx queue data structure. - */ -static void octep_vf_iq_free_pending(struct octep_vf_iq *iq) -{ - struct octep_vf_tx_buffer *tx_buffer; - struct skb_shared_info *shinfo; - u32 fi = iq->flush_index; - struct sk_buff *skb; - u8 frags, i; - - while (fi != iq->host_write_index) { - tx_buffer = iq->buff_info + fi; - skb = tx_buffer->skb; - - fi++; - if (unlikely(fi == iq->max_count)) - fi = 0; - - if (!tx_buffer->gather) { - dma_unmap_single(iq->dev, tx_buffer->dma, - tx_buffer->skb->len, DMA_TO_DEVICE); - dev_kfree_skb_any(skb); - continue; - } - - /* Scatter/Gather */ - shinfo = skb_shinfo(skb); - frags = shinfo->nr_frags; - - dma_unmap_single(iq->dev, - tx_buffer->sglist[0].dma_ptr[0], - tx_buffer->sglist[0].len[0], - DMA_TO_DEVICE); - - i = 1; /* entry 0 is main skb, unmapped above */ - while (frags--) { - dma_unmap_page(iq->dev, tx_buffer->sglist[i >> 2].dma_ptr[i & 3], - tx_buffer->sglist[i >> 2].len[i & 3], DMA_TO_DEVICE); - i++; - } - - dev_kfree_skb_any(skb); - } - - iq->flush_index = fi; - netdev_tx_reset_queue(netdev_get_tx_queue(iq->netdev, iq->q_no)); -} - -/** - * octep_vf_clean_iqs() - Clean Tx queues to shutdown the device. - * - * @oct: Octeon device private data structure. - * - * Free the buffers in Tx queue descriptors pending completion and - * reset queue indices - */ -void octep_vf_clean_iqs(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < oct->num_iqs; i++) { - octep_vf_iq_free_pending(oct->iq[i]); - octep_vf_iq_reset_indices(oct->iq[i]); - } -} - -/** - * octep_vf_setup_iq() - Setup a Tx queue. - * - * @oct: Octeon device private data structure. - * @q_no: Tx queue number to be setup. - * - * Allocate resources for a Tx queue. - */ -static int octep_vf_setup_iq(struct octep_vf_device *oct, int q_no) -{ - u32 desc_ring_size, buff_info_size, sglist_size; - struct octep_vf_iq *iq; - int i; - - iq = vzalloc(sizeof(*iq)); - if (!iq) - goto iq_alloc_err; - oct->iq[q_no] = iq; - - iq->octep_vf_dev = oct; - iq->netdev = oct->netdev; - iq->dev = &oct->pdev->dev; - iq->q_no = q_no; - iq->max_count = CFG_GET_IQ_NUM_DESC(oct->conf); - iq->ring_size_mask = iq->max_count - 1; - iq->fill_threshold = CFG_GET_IQ_DB_MIN(oct->conf); - iq->netdev_q = netdev_get_tx_queue(iq->netdev, q_no); - - /* Allocate memory for hardware queue descriptors */ - desc_ring_size = OCTEP_VF_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf); - iq->desc_ring = dma_alloc_coherent(iq->dev, desc_ring_size, - &iq->desc_ring_dma, GFP_KERNEL); - if (unlikely(!iq->desc_ring)) { - dev_err(iq->dev, - "Failed to allocate DMA memory for IQ-%d\n", q_no); - goto desc_dma_alloc_err; - } - - /* Allocate memory for hardware SGLIST descriptors */ - sglist_size = OCTEP_VF_SGLIST_SIZE_PER_PKT * - CFG_GET_IQ_NUM_DESC(oct->conf); - iq->sglist = dma_alloc_coherent(iq->dev, sglist_size, - &iq->sglist_dma, GFP_KERNEL); - if (unlikely(!iq->sglist)) { - dev_err(iq->dev, - "Failed to allocate DMA memory for IQ-%d SGLIST\n", - q_no); - goto sglist_alloc_err; - } - - /* allocate memory to manage Tx packets pending completion */ - buff_info_size = OCTEP_VF_IQ_TXBUFF_INFO_SIZE * iq->max_count; - iq->buff_info = vzalloc(buff_info_size); - if (!iq->buff_info) { - dev_err(iq->dev, - "Failed to allocate buff info for IQ-%d\n", q_no); - goto buff_info_err; - } - - /* Setup sglist addresses in tx_buffer entries */ - for (i = 0; i < CFG_GET_IQ_NUM_DESC(oct->conf); i++) { - struct octep_vf_tx_buffer *tx_buffer; - - tx_buffer = &iq->buff_info[i]; - tx_buffer->sglist = - &iq->sglist[i * OCTEP_VF_SGLIST_ENTRIES_PER_PKT]; - tx_buffer->sglist_dma = - iq->sglist_dma + (i * OCTEP_VF_SGLIST_SIZE_PER_PKT); - } - - octep_vf_iq_reset_indices(iq); - oct->hw_ops.setup_iq_regs(oct, q_no); - - oct->num_iqs++; - return 0; - -buff_info_err: - dma_free_coherent(iq->dev, sglist_size, iq->sglist, iq->sglist_dma); -sglist_alloc_err: - dma_free_coherent(iq->dev, desc_ring_size, - iq->desc_ring, iq->desc_ring_dma); -desc_dma_alloc_err: - vfree(iq); - oct->iq[q_no] = NULL; -iq_alloc_err: - return -1; -} - -/** - * octep_vf_free_iq() - Free Tx queue resources. - * - * @iq: Octeon Tx queue data structure. - * - * Free all the resources allocated for a Tx queue. - */ -static void octep_vf_free_iq(struct octep_vf_iq *iq) -{ - struct octep_vf_device *oct = iq->octep_vf_dev; - u64 desc_ring_size, sglist_size; - int q_no = iq->q_no; - - desc_ring_size = OCTEP_VF_IQ_DESC_SIZE * CFG_GET_IQ_NUM_DESC(oct->conf); - - vfree(iq->buff_info); - - if (iq->desc_ring) - dma_free_coherent(iq->dev, desc_ring_size, - iq->desc_ring, iq->desc_ring_dma); - - sglist_size = OCTEP_VF_SGLIST_SIZE_PER_PKT * - CFG_GET_IQ_NUM_DESC(oct->conf); - if (iq->sglist) - dma_free_coherent(iq->dev, sglist_size, - iq->sglist, iq->sglist_dma); - - vfree(iq); - oct->iq[q_no] = NULL; - oct->num_iqs--; -} - -/** - * octep_vf_setup_iqs() - setup resources for all Tx queues. - * - * @oct: Octeon device private data structure. - */ -int octep_vf_setup_iqs(struct octep_vf_device *oct) -{ - int i; - - oct->num_iqs = 0; - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { - if (octep_vf_setup_iq(oct, i)) { - dev_err(&oct->pdev->dev, - "Failed to setup IQ(TxQ)-%d.\n", i); - goto iq_setup_err; - } - dev_dbg(&oct->pdev->dev, "Successfully setup IQ(TxQ)-%d.\n", i); - } - - return 0; - -iq_setup_err: - while (i) { - i--; - octep_vf_free_iq(oct->iq[i]); - } - return -1; -} - -/** - * octep_vf_free_iqs() - Free resources of all Tx queues. - * - * @oct: Octeon device private data structure. - */ -void octep_vf_free_iqs(struct octep_vf_device *oct) -{ - int i; - - for (i = 0; i < CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); i++) { - octep_vf_free_iq(oct->iq[i]); - dev_dbg(&oct->pdev->dev, - "Successfully destroyed IQ(TxQ)-%d.\n", i); - } - oct->num_iqs = 0; -} diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h deleted file mode 100644 index f338b975103c..000000000000 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_tx.h +++ /dev/null @@ -1,276 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Marvell Octeon EP (EndPoint) VF Ethernet Driver - * - * Copyright (C) 2020 Marvell. - * - */ - -#ifndef _OCTEP_VF_TX_H_ -#define _OCTEP_VF_TX_H_ - -#define IQ_SEND_OK 0 -#define IQ_SEND_STOP 1 -#define IQ_SEND_FAILED -1 - -#define TX_BUFTYPE_NONE 0 -#define TX_BUFTYPE_NET 1 -#define TX_BUFTYPE_NET_SG 2 -#define NUM_TX_BUFTYPES 3 - -/* Hardware format for Scatter/Gather list - * - * 63 48|47 32|31 16|15 0 - * ----------------------------------------- - * | Len 0 | Len 1 | Len 2 | Len 3 | - * ----------------------------------------- - * | Ptr 0 | - * ----------------------------------------- - * | Ptr 1 | - * ----------------------------------------- - * | Ptr 2 | - * ----------------------------------------- - * | Ptr 3 | - * ----------------------------------------- - */ -struct octep_vf_tx_sglist_desc { - u16 len[4]; - dma_addr_t dma_ptr[4]; -}; - -static_assert(sizeof(struct octep_vf_tx_sglist_desc) == 40); - -/* Each Scatter/Gather entry sent to hardwar hold four pointers. - * So, number of entries required is (MAX_SKB_FRAGS + 1)/4, where '+1' - * is for main skb which also goes as a gather buffer to Octeon hardware. - * To allocate sufficient SGLIST entries for a packet with max fragments, - * align by adding 3 before calcuating max SGLIST entries per packet. - */ -#define OCTEP_VF_SGLIST_ENTRIES_PER_PKT ((MAX_SKB_FRAGS + 1 + 3) / 4) -#define OCTEP_VF_SGLIST_SIZE_PER_PKT \ - (OCTEP_VF_SGLIST_ENTRIES_PER_PKT * sizeof(struct octep_vf_tx_sglist_desc)) - -struct octep_vf_tx_buffer { - struct sk_buff *skb; - dma_addr_t dma; - struct octep_vf_tx_sglist_desc *sglist; - dma_addr_t sglist_dma; - u8 gather; -}; - -#define OCTEP_VF_IQ_TXBUFF_INFO_SIZE (sizeof(struct octep_vf_tx_buffer)) - -/* VF Hardware interface Tx statistics */ -struct octep_vf_iface_tx_stats { - /* Total frames sent on the interface */ - u64 pkts; - - /* Total octets sent on the interface */ - u64 octs; - - /* Packets sent to a broadcast DMAC */ - u64 bcst; - - /* Packets sent to the multicast DMAC */ - u64 mcst; - - /* Packets dropped */ - u64 dropped; - - /* Reserved */ - u64 reserved[13]; -}; - -/* VF Input Queue statistics */ -struct octep_vf_iq_stats { - /* Instructions posted to this queue. */ - u64 instr_posted; - - /* Instructions copied by hardware for processing. */ - u64 instr_completed; - - /* Instructions that could not be processed. */ - u64 instr_dropped; - - /* Bytes sent through this queue. */ - u64 bytes_sent; - - /* Gather entries sent through this queue. */ - u64 sgentry_sent; - - /* Number of transmit failures due to TX_BUSY */ - u64 tx_busy; - - /* Number of times the queue is restarted */ - u64 restart_cnt; -}; - -/* The instruction (input) queue. - * The input queue is used to post raw (instruction) mode data or packet - * data to Octeon device from the host. Each input queue (up to 4) for - * a Octeon device has one such structure to represent it. - */ -struct octep_vf_iq { - u32 q_no; - - struct octep_vf_device *octep_vf_dev; - struct net_device *netdev; - struct device *dev; - struct netdev_queue *netdev_q; - - /* Index in input ring where driver should write the next packet */ - u16 host_write_index; - - /* Index in input ring where Octeon is expected to read next packet */ - u16 octep_vf_read_index; - - /* This index aids in finding the window in the queue where Octeon - * has read the commands. - */ - u16 flush_index; - - /* Statistics for this input queue. */ - struct octep_vf_iq_stats stats; - - /* Pointer to the Virtual Base addr of the input ring. */ - struct octep_vf_tx_desc_hw *desc_ring; - - /* DMA mapped base address of the input descriptor ring. */ - dma_addr_t desc_ring_dma; - - /* Info of Tx buffers pending completion. */ - struct octep_vf_tx_buffer *buff_info; - - /* Base pointer to Scatter/Gather lists for all ring descriptors. */ - struct octep_vf_tx_sglist_desc *sglist; - - /* DMA mapped addr of Scatter Gather Lists */ - dma_addr_t sglist_dma; - - /* Octeon doorbell register for the ring. */ - u8 __iomem *doorbell_reg; - - /* Octeon instruction count register for this ring. */ - u8 __iomem *inst_cnt_reg; - - /* interrupt level register for this ring */ - u8 __iomem *intr_lvl_reg; - - /* Maximum no. of instructions in this queue. */ - u32 max_count; - u32 ring_size_mask; - - u32 pkt_in_done; - u32 pkts_processed; - - u32 status; - - /* Number of instructions pending to be posted to Octeon. */ - u32 fill_cnt; - - /* The max. number of instructions that can be held pending by the - * driver before ringing doorbell. - */ - u32 fill_threshold; -}; - -/* Hardware Tx Instruction Header */ -struct octep_vf_instr_hdr { - /* Data Len */ - u64 tlen:16; - - /* Reserved */ - u64 rsvd:20; - - /* PKIND for SDP */ - u64 pkind:6; - - /* Front Data size */ - u64 fsz:6; - - /* No. of entries in gather list */ - u64 gsz:14; - - /* Gather indicator 1=gather*/ - u64 gather:1; - - /* Reserved3 */ - u64 reserved3:1; -}; - -static_assert(sizeof(struct octep_vf_instr_hdr) == 8); - -/* Tx offload flags */ -#define OCTEP_VF_TX_OFFLOAD_VLAN_INSERT BIT(0) -#define OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM BIT(1) -#define OCTEP_VF_TX_OFFLOAD_UDP_CKSUM BIT(2) -#define OCTEP_VF_TX_OFFLOAD_TCP_CKSUM BIT(3) -#define OCTEP_VF_TX_OFFLOAD_SCTP_CKSUM BIT(4) -#define OCTEP_VF_TX_OFFLOAD_TCP_TSO BIT(5) -#define OCTEP_VF_TX_OFFLOAD_UDP_TSO BIT(6) - -#define OCTEP_VF_TX_OFFLOAD_CKSUM (OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM | \ - OCTEP_VF_TX_OFFLOAD_UDP_CKSUM | \ - OCTEP_VF_TX_OFFLOAD_TCP_CKSUM) - -#define OCTEP_VF_TX_OFFLOAD_TSO (OCTEP_VF_TX_OFFLOAD_TCP_TSO | \ - OCTEP_VF_TX_OFFLOAD_UDP_TSO) - -#define OCTEP_VF_TX_IP_CSUM(flags) ((flags) & \ - (OCTEP_VF_TX_OFFLOAD_IPV4_CKSUM | \ - OCTEP_VF_TX_OFFLOAD_TCP_CKSUM | \ - OCTEP_VF_TX_OFFLOAD_UDP_CKSUM)) - -#define OCTEP_VF_TX_TSO(flags) ((flags) & \ - (OCTEP_VF_TX_OFFLOAD_TCP_TSO | \ - OCTEP_VF_TX_OFFLOAD_UDP_TSO)) - -struct tx_mdata { - /* offload flags */ - u16 ol_flags; - - /* gso size */ - u16 gso_size; - - /* gso flags */ - u16 gso_segs; - - /* reserved */ - u16 rsvd1; - - /* reserved */ - u64 rsvd2; -}; - -static_assert(sizeof(struct tx_mdata) == 16); - -/* 64-byte Tx instruction format. - * Format of instruction for a 64-byte mode input queue. - * - * only first 16-bytes (dptr and ih) are mandatory; rest are optional - * and filled by the driver based on firmware/hardware capabilities. - * These optional headers together called Front Data and its size is - * described by ih->fsz. - */ -struct octep_vf_tx_desc_hw { - /* Pointer where the input data is available. */ - u64 dptr; - - /* Instruction Header. */ - union { - struct octep_vf_instr_hdr ih; - u64 ih64; - }; - - union { - u64 txm64[2]; - struct tx_mdata txm; - }; - - /* Additional headers available in a 64-byte instruction. */ - u64 exhdr[4]; -}; - -static_assert(sizeof(struct octep_vf_tx_desc_hw) == 64); - -#define OCTEP_VF_IQ_DESC_SIZE (sizeof(struct octep_vf_tx_desc_hw)) -#endif /* _OCTEP_VF_TX_H_ */ -- cgit v1.3.1 From fe1eb24bd5ade085914248c527044e942f75e06a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 4 Jan 2024 16:04:35 -0800 Subject: Revert "Introduce PHY listing and link_topology tracking" This reverts commit 32bb4515e34469975abc936deb0a116c4a445817. This reverts commit d078d480639a4f3b5fc2d56247afa38e0956483a. This reverts commit fcc4b105caa4b844bf043375bf799c20a9c99db1. This reverts commit 345237dbc1bdbb274c9fb9ec38976261ff4a40b8. This reverts commit 7db69ec9cfb8b4ab50420262631fb2d1908b25bf. This reverts commit 95132a018f00f5dad38bdcfd4180d1af955d46f6. This reverts commit 63d5eaf35ac36cad00cfb3809d794ef0078c822b. This reverts commit c29451aefcb42359905d18678de38e52eccb3bb5. This reverts commit 2ab0edb505faa9ac90dee1732571390f074e8113. This reverts commit dedd702a35793ab462fce4c737eeba0badf9718e. This reverts commit 034fcc210349b873ece7356905be5c6ca11eef2a. This reverts commit 9c5625f559ad6fe9f6f733c11475bf470e637d34. This reverts commit 02018c544ef113e980a2349eba89003d6f399d22. Looks like we need more time for reviews, and incremental changes will be hard to make sense of. So revert. Link: https://lore.kernel.org/all/ZZP6FV5sXEf+xd58@shell.armlinux.org.uk/ Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/ethtool.yaml | 68 ------ Documentation/networking/ethtool-netlink.rst | 51 ----- Documentation/networking/index.rst | 1 - Documentation/networking/phy-link-topology.rst | 121 ---------- MAINTAINERS | 2 - drivers/net/phy/Makefile | 2 +- drivers/net/phy/at803x.c | 2 - drivers/net/phy/marvell-88x2222.c | 2 - drivers/net/phy/marvell.c | 2 - drivers/net/phy/marvell10g.c | 2 - drivers/net/phy/phy_device.c | 55 ----- drivers/net/phy/phy_link_topology.c | 66 ------ drivers/net/phy/phylink.c | 3 +- drivers/net/phy/sfp-bus.c | 15 +- include/linux/netdevice.h | 4 +- include/linux/phy.h | 6 - include/linux/phy_link_topology.h | 67 ------ include/linux/phy_link_topology_core.h | 19 -- include/linux/sfp.h | 8 +- include/uapi/linux/ethtool.h | 16 -- include/uapi/linux/ethtool_netlink.h | 30 --- net/core/dev.c | 3 - net/ethtool/Makefile | 2 +- net/ethtool/cabletest.c | 12 +- net/ethtool/netlink.c | 33 --- net/ethtool/netlink.h | 12 +- net/ethtool/phy.c | 306 ------------------------- net/ethtool/plca.c | 13 +- net/ethtool/pse-pd.c | 9 +- net/ethtool/strset.c | 15 +- 30 files changed, 35 insertions(+), 912 deletions(-) delete mode 100644 Documentation/networking/phy-link-topology.rst delete mode 100644 drivers/net/phy/phy_link_topology.c delete mode 100644 include/linux/phy_link_topology.h delete mode 100644 include/linux/phy_link_topology_core.h delete mode 100644 net/ethtool/phy.c (limited to 'Documentation') diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 7f6fb1f61dd4..197208f419dc 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -16,11 +16,6 @@ definitions: name: stringset type: enum entries: [] - - - name: phy-upstream-type - enum-name: - type: enum - entries: [ mac, phy ] attribute-sets: - @@ -35,9 +30,6 @@ attribute-sets: - name: flags type: u32 - - - name: phy-index - type: u32 - name: bitset-bit @@ -950,45 +942,6 @@ attribute-sets: - name: burst-tmr type: u32 - - - name: phy-upstream - attributes: - - - name: index - type: u32 - - - name: sfp-name - type: string - - - name: phy - attributes: - - - name: header - type: nest - nested-attributes: header - - - name: index - type: u32 - - - name: drvname - type: string - - - name: name - type: string - - - name: upstream-type - type: u8 - enum: phy-upstream-type - - - name: upstream - type: nest - nested-attributes: phy-upstream - - - name: downstream-sfp-name - type: string - - - name: id - type: u32 operations: enum-model: directional @@ -1740,24 +1693,3 @@ operations: name: mm-ntf doc: Notification for change in MAC Merge configuration. notify: mm-get - - - name: phy-get - doc: Get PHY devices attached to an interface - - attribute-set: phy - - do: &phy-get-op - request: - attributes: - - header - reply: - attributes: - - header - - index - - drvname - - name - - upstream-type - - upstream - - downstream-sfp-name - - id - dump: *phy-get-op diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 97ff787a7dd8..d583d9abf2f8 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -57,7 +57,6 @@ Structure of this header is ``ETHTOOL_A_HEADER_DEV_INDEX`` u32 device ifindex ``ETHTOOL_A_HEADER_DEV_NAME`` string device name ``ETHTOOL_A_HEADER_FLAGS`` u32 flags common for all requests - ``ETHTOOL_A_HEADER_PHY_INDEX`` u32 phy device index ============================== ====== ============================= ``ETHTOOL_A_HEADER_DEV_INDEX`` and ``ETHTOOL_A_HEADER_DEV_NAME`` identify the @@ -82,12 +81,6 @@ the behaviour is backward compatible, i.e. requests from old clients not aware of the flag should be interpreted the way the client expects. A client must not set flags it does not understand. -``ETHTOOL_A_HEADER_PHY_INDEX`` identify the ethernet PHY the message relates to. -As there are numerous commands that are related to PHY configuration, and because -we can have more than one PHY on the link, the PHY index can be passed in the -request for the commands that needs it. It is however not mandatory, and if it -is not passed for commands that target a PHY, the net_device.phydev pointer -is used, as a fallback that keeps the legacy behaviour. Bit sets ======== @@ -2011,49 +2004,6 @@ The attributes are propagated to the driver through the following structure: .. kernel-doc:: include/linux/ethtool.h :identifiers: ethtool_mm_cfg -PHY_GET -======= - -Retrieve information about a given Ethernet PHY sitting on the link. As there -can be more than one PHY, the DUMP operation can be used to list the PHYs -present on a given interface, by passing an interface index or name in -the dump request - -Request contents: - - ==================================== ====== ========================== - ``ETHTOOL_A_PHY_HEADER`` nested request header - ==================================== ====== ========================== - -Kernel response contents: - - ===================================== ====== ========================== - ``ETHTOOL_A_PHY_HEADER`` nested request header - ``ETHTOOL_A_PHY_INDEX`` u32 the phy's unique index, that can - be used for phy-specific requests - ``ETHTOOL_A_PHY_DRVNAME`` string the phy driver name - ``ETHTOOL_A_PHY_NAME`` string the phy device name - ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` u32 the type of device this phy is - connected to - ``ETHTOOL_A_PHY_UPSTREAM_PHY`` nested if the phy is connected to another - phy, this nest contains info on - that connection - ``ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME`` string if the phy controls an sfp bus, - the name of the sfp bus - ``ETHTOOL_A_PHY_ID`` u32 the phy id if the phy is C22 - ===================================== ====== ========================== - -When ``ETHTOOL_A_PHY_UPSTREAM_TYPE`` is PHY_UPSTREAM_PHY, the PHY's parent is -another PHY. Information on the parent PHY will be set in the -``ETHTOOL_A_PHY_UPSTREAM_PHY`` nest, which has the following structure : - - =================================== ====== ========================== - ``ETHTOOL_A_PHY_UPSTREAM_INDEX`` u32 the PHY index of the upstream PHY - ``ETHTOOL_A_PHY_UPSTREAM_SFP_NAME`` string if this PHY is connected to it's - parent PHY through an SFP bus, the - name of this sfp bus - =================================== ====== ========================== - Request translation =================== @@ -2160,5 +2110,4 @@ are netlink only. n/a ``ETHTOOL_MSG_PLCA_GET_STATUS`` n/a ``ETHTOOL_MSG_MM_GET`` n/a ``ETHTOOL_MSG_MM_SET`` - n/a ``ETHTOOL_MSG_PHY_GET`` =================================== ===================================== diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index a2c45a75a4a6..69f3d6dcd9fd 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -88,7 +88,6 @@ Contents: operstates packet_mmap phonet - phy-link-topology pktgen plip ppp_generic diff --git a/Documentation/networking/phy-link-topology.rst b/Documentation/networking/phy-link-topology.rst deleted file mode 100644 index 1fd8e904ef4b..000000000000 --- a/Documentation/networking/phy-link-topology.rst +++ /dev/null @@ -1,121 +0,0 @@ -.. SPDX-License-Identifier: GPL-2.0 - -================= -PHY link topology -================= - -Overview -======== - -The PHY link topology representation in the networking stack aims at representing -the hardware layout for any given Ethernet link. - -An Ethernet Interface from userspace's point of view is nothing but a -:c:type:`struct net_device `, which exposes configuration options -through the legacy ioctls and the ethool netlink commands. The base assumption -when designing these configuration channels were that the link looked -something like this :: - - +-----------------------+ +----------+ +--------------+ - | Ethernet Controller / | | Ethernet | | Connector / | - | MAC | ------ | PHY | ---- | Port | ---... to LP - +-----------------------+ +----------+ +--------------+ - struct net_device struct phy_device - -Commands that needs to configure the PHY will go through the net_device.phydev -field to reach the PHY and perform the relevant configuration. - -This assumption falls apart in more complex topologies that can arise when, -for example, using SFP transceivers (although that's not the only specific case). - -Here, we have 2 basic scenarios. Either the MAC is able to output a serialized -interface, that can directly be fed to an SFP cage, such as SGMII, 1000BaseX, -10GBaseR, etc. - -The link topology then looks like this (when an SFP module is inserted) :: - - +-----+ SGMII +------------+ - | MAC | ------- | SFP Module | - +-----+ +------------+ - -Knowing that some modules embed a PHY, the actual link is more like :: - - +-----+ SGMII +--------------+ - | MAC | -------- | PHY (on SFP) | - +-----+ +--------------+ - -In this case, the SFP PHY is handled by phylib, and registered by phylink through -its SFP upstream ops. - -Now some Ethernet controllers aren't able to output a serialized interface, so -we can't directly connect them to an SFP cage. However, some PHYs can be used -as media-converters, to translate the non-serialized MAC MII interface to a -serialized MII interface fed to the SFP :: - - +-----+ RGMII +-----------------------+ SGMII +--------------+ - | MAC | ------- | PHY (media converter) | ------- | PHY (on SFP) | - +-----+ +-----------------------+ +--------------+ - -This is where the model of having a single net_device.phydev pointer shows its -limitations, as we now have 2 PHYs on the link. - -The phy_link topology framework aims at providing a way to keep track of every -PHY on the link, for use by both kernel drivers and subsystems, but also to -report the topology to userspace, allowing to target individual PHYs in configuration -commands. - -API -=== - -The :c:type:`struct phy_link_topology ` is a per-netdevice -resource, that gets initialized at netdevice creation. Once it's initialized, -it is then possible to register PHYs to the topology through : - -:c:func:`phy_link_topo_add_phy` - -Besides registering the PHY to the topology, this call will also assign a unique -index to the PHY, which can then be reported to userspace to refer to this PHY -(akin to the ifindex). This index is a u32, ranging from 1 to U32_MAX. The value -0 is reserved to indicate the PHY doesn't belong to any topology yet. - -The PHY can then be removed from the topology through - -:c:func:`phy_link_topo_del_phy` - -These function are already hooked into the phylib subsystem, so all PHYs that -are linked to a net_device through :c:func:`phy_attach_direct` will automatically -join the netdev's topology. - -PHYs that are on a SFP module will also be automatically registered IF the SFP -upstream is phylink (so, no media-converter). - -PHY drivers that can be used as SFP upstream need to call :c:func:`phy_sfp_attach_phy` -and :c:func:`phy_sfp_detach_phy`, which can be used as a -.attach_phy / .detach_phy implementation for the -:c:type:`struct sfp_upstream_ops `. - -UAPI -==== - -There exist a set of netlink commands to query the link topology from userspace, -see ``Documentation/networking/ethtool-netlink.rst``. - -The whole point of having a topology representation is to assign the phyindex -field in :c:type:`struct phy_device `. This index is reported to -userspace using the ``ETHTOOL_MSG_PHY_GET`` ethtnl command. Performing a DUMP operation -will result in all PHYs from all net_device being listed. The DUMP command -accepts either a ``ETHTOOL_A_HEADER_DEV_INDEX`` or ``ETHTOOL_A_HEADER_DEV_NAME`` -to be passed in the request to filter the DUMP to a single net_device. - -The retrieved index can then be passed as a request parameter using the -``ETHTOOL_A_HEADER_PHY_INDEX`` field in the following ethnl commands : - -* ``ETHTOOL_MSG_STRSET_GET`` to get the stats string set from a given PHY -* ``ETHTOOL_MSG_CABLE_TEST_ACT`` and ``ETHTOOL_MSG_CABLE_TEST_ACT``, to perform - cable testing on a given PHY on the link (most likely the outermost PHY) -* ``ETHTOOL_MSG_PSE_SET`` and ``ETHTOOL_MSG_PSE_GET`` for PHY-controlled PoE and PSE settings -* ``ETHTOOL_MSG_PLCA_GET_CFG``, ``ETHTOOL_MSG_PLCA_SET_CFG`` and ``ETHTOOL_MSG_PLCA_GET_STATUS`` - to set the PLCA (Physical Layer Collision Avoidance) parameters - -Note that the PHY index can be passed to other requests, which will silently -ignore it if present and irrelevant. diff --git a/MAINTAINERS b/MAINTAINERS index 79ac49b113dc..2b916990d7f0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7871,8 +7871,6 @@ F: include/linux/mii.h F: include/linux/of_net.h F: include/linux/phy.h F: include/linux/phy_fixed.h -F: include/linux/phy_link_topology.h -F: include/linux/phy_link_topology_core.h F: include/linux/phylib_stubs.h F: include/linux/platform_data/mdio-bcm-unimac.h F: include/linux/platform_data/mdio-gpio.h diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index f218954fd7a8..6097afd44392 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -2,7 +2,7 @@ # Makefile for Linux PHY drivers libphy-y := phy.o phy-c45.o phy-core.o phy_device.o \ - linkmode.o phy_link_topology.o + linkmode.o mdio-bus-y += mdio_bus.o mdio_device.o ifdef CONFIG_MDIO_DEVICE diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c index aaf6c654aaed..19cfbf36fe80 100644 --- a/drivers/net/phy/at803x.c +++ b/drivers/net/phy/at803x.c @@ -1452,8 +1452,6 @@ static const struct sfp_upstream_ops at8031_sfp_ops = { .attach = phy_sfp_attach, .detach = phy_sfp_detach, .module_insert = at8031_sfp_insert, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, }; static int at8031_parse_dt(struct phy_device *phydev) diff --git a/drivers/net/phy/marvell-88x2222.c b/drivers/net/phy/marvell-88x2222.c index 3f77bbc7e04f..e3aa30dad2e6 100644 --- a/drivers/net/phy/marvell-88x2222.c +++ b/drivers/net/phy/marvell-88x2222.c @@ -555,8 +555,6 @@ static const struct sfp_upstream_ops sfp_phy_ops = { .link_down = mv2222_sfp_link_down, .attach = phy_sfp_attach, .detach = phy_sfp_detach, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, }; static int mv2222_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 674e29bce2cc..eba652a4c1d8 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -3254,8 +3254,6 @@ static const struct sfp_upstream_ops m88e1510_sfp_ops = { .module_remove = m88e1510_sfp_remove, .attach = phy_sfp_attach, .detach = phy_sfp_detach, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, }; static int m88e1510_probe(struct phy_device *phydev) diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c index 6642eb642d4b..ad43e280930c 100644 --- a/drivers/net/phy/marvell10g.c +++ b/drivers/net/phy/marvell10g.c @@ -503,8 +503,6 @@ static int mv3310_sfp_insert(void *upstream, const struct sfp_eeprom_id *id) static const struct sfp_upstream_ops mv3310_sfp_ops = { .attach = phy_sfp_attach, .detach = phy_sfp_detach, - .connect_phy = phy_sfp_connect_phy, - .disconnect_phy = phy_sfp_disconnect_phy, .module_insert = mv3310_sfp_insert, }; diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 1e595762afea..3611ea64875e 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -266,14 +265,6 @@ static void phy_mdio_device_remove(struct mdio_device *mdiodev) static struct phy_driver genphy_driver; -static struct phy_link_topology *phy_get_link_topology(struct phy_device *phydev) -{ - if (phydev->attached_dev) - return &phydev->attached_dev->link_topo; - - return NULL; -} - static LIST_HEAD(phy_fixup_list); static DEFINE_MUTEX(phy_fixup_lock); @@ -1363,46 +1354,6 @@ phy_standalone_show(struct device *dev, struct device_attribute *attr, } static DEVICE_ATTR_RO(phy_standalone); -/** - * phy_sfp_connect_phy - Connect the SFP module's PHY to the upstream PHY - * @upstream: pointer to the upstream phy device - * @phy: pointer to the SFP module's phy device - * - * This helper allows keeping track of PHY devices on the link. It adds the - * SFP module's phy to the phy namespace of the upstream phy - */ -int phy_sfp_connect_phy(void *upstream, struct phy_device *phy) -{ - struct phy_device *phydev = upstream; - struct phy_link_topology *topo = phy_get_link_topology(phydev); - - if (topo) - return phy_link_topo_add_phy(topo, phy, PHY_UPSTREAM_PHY, phydev); - - return 0; -} -EXPORT_SYMBOL(phy_sfp_connect_phy); - -/** - * phy_sfp_disconnect_phy - Disconnect the SFP module's PHY from the upstream PHY - * @upstream: pointer to the upstream phy device - * @phy: pointer to the SFP module's phy device - * - * This helper allows keeping track of PHY devices on the link. It removes the - * SFP module's phy to the phy namespace of the upstream phy. As the module phy - * will be destroyed, re-inserting the same module will add a new phy with a - * new index. - */ -void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy) -{ - struct phy_device *phydev = upstream; - struct phy_link_topology *topo = phy_get_link_topology(phydev); - - if (topo) - phy_link_topo_del_phy(topo, phy); -} -EXPORT_SYMBOL(phy_sfp_disconnect_phy); - /** * phy_sfp_attach - attach the SFP bus to the PHY upstream network device * @upstream: pointer to the phy device @@ -1540,11 +1491,6 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev, if (phydev->sfp_bus_attached) dev->sfp_bus = phydev->sfp_bus; - - err = phy_link_topo_add_phy(&dev->link_topo, phydev, - PHY_UPSTREAM_MAC, dev); - if (err) - goto error; } /* Some Ethernet drivers try to connect to a PHY device before @@ -1874,7 +1820,6 @@ void phy_detach(struct phy_device *phydev) if (dev) { phydev->attached_dev->phydev = NULL; phydev->attached_dev = NULL; - phy_link_topo_del_phy(&dev->link_topo, phydev); } phydev->phylink = NULL; diff --git a/drivers/net/phy/phy_link_topology.c b/drivers/net/phy/phy_link_topology.c deleted file mode 100644 index 34e7e08fbfc3..000000000000 --- a/drivers/net/phy/phy_link_topology.c +++ /dev/null @@ -1,66 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Infrastructure to handle all PHY devices connected to a given netdev, - * either directly or indirectly attached. - * - * Copyright (c) 2023 Maxime Chevallier - */ - -#include -#include -#include -#include -#include - -int phy_link_topo_add_phy(struct phy_link_topology *topo, - struct phy_device *phy, - enum phy_upstream upt, void *upstream) -{ - struct phy_device_node *pdn; - int ret; - - pdn = kzalloc(sizeof(*pdn), GFP_KERNEL); - if (!pdn) - return -ENOMEM; - - pdn->phy = phy; - switch (upt) { - case PHY_UPSTREAM_MAC: - pdn->upstream.netdev = (struct net_device *)upstream; - if (phy_on_sfp(phy)) - pdn->parent_sfp_bus = pdn->upstream.netdev->sfp_bus; - break; - case PHY_UPSTREAM_PHY: - pdn->upstream.phydev = (struct phy_device *)upstream; - if (phy_on_sfp(phy)) - pdn->parent_sfp_bus = pdn->upstream.phydev->sfp_bus; - break; - default: - ret = -EINVAL; - goto err; - } - pdn->upstream_type = upt; - - ret = xa_alloc_cyclic(&topo->phys, &phy->phyindex, pdn, xa_limit_32b, - &topo->next_phy_index, GFP_KERNEL); - if (ret) - goto err; - - return 0; - -err: - kfree(pdn); - return ret; -} -EXPORT_SYMBOL_GPL(phy_link_topo_add_phy); - -void phy_link_topo_del_phy(struct phy_link_topology *topo, - struct phy_device *phy) -{ - struct phy_device_node *pdn = xa_erase(&topo->phys, phy->phyindex); - - phy->phyindex = 0; - - kfree(pdn); -} -EXPORT_SYMBOL_GPL(phy_link_topo_del_phy); diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index a816391add12..ed0b4ccaa6a6 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -3385,8 +3385,7 @@ static int phylink_sfp_connect_phy(void *upstream, struct phy_device *phy) return ret; } -static void phylink_sfp_disconnect_phy(void *upstream, - struct phy_device *phydev) +static void phylink_sfp_disconnect_phy(void *upstream) { phylink_disconnect_phy(upstream); } diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index fb1c102714b5..6fa679b36290 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -486,7 +486,7 @@ static void sfp_unregister_bus(struct sfp_bus *bus) bus->socket_ops->stop(bus->sfp); bus->socket_ops->detach(bus->sfp); if (bus->phydev && ops && ops->disconnect_phy) - ops->disconnect_phy(bus->upstream, bus->phydev); + ops->disconnect_phy(bus->upstream); } bus->registered = false; } @@ -742,7 +742,7 @@ void sfp_remove_phy(struct sfp_bus *bus) const struct sfp_upstream_ops *ops = sfp_get_upstream_ops(bus); if (ops && ops->disconnect_phy) - ops->disconnect_phy(bus->upstream, bus->phydev); + ops->disconnect_phy(bus->upstream); bus->phydev = NULL; } EXPORT_SYMBOL_GPL(sfp_remove_phy); @@ -859,14 +859,3 @@ void sfp_unregister_socket(struct sfp_bus *bus) sfp_bus_put(bus); } EXPORT_SYMBOL_GPL(sfp_unregister_socket); - -const char *sfp_get_name(struct sfp_bus *bus) -{ - ASSERT_RTNL(); - - if (bus->sfp_dev) - return dev_name(bus->sfp_dev); - - return NULL; -} -EXPORT_SYMBOL_GPL(sfp_get_name); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e265aa1f2169..118c40258d07 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -40,6 +40,7 @@ #include #endif #include + #include #include #include @@ -51,7 +52,6 @@ #include #include #include -#include struct netpoll_info; struct device; @@ -2047,7 +2047,6 @@ enum netdev_stat_type { * @fcoe_ddp_xid: Max exchange id for FCoE LRO by ddp * * @priomap: XXX: need comments on this one - * @link_topo: Physical link topology tracking attached PHYs * @phydev: Physical device may attach itself * for hardware timestamping * @sfp_bus: attached &struct sfp_bus structure. @@ -2442,7 +2441,6 @@ struct net_device { #if IS_ENABLED(CONFIG_CGROUP_NET_PRIO) struct netprio_map __rcu *priomap; #endif - struct phy_link_topology link_topo; struct phy_device *phydev; struct sfp_bus *sfp_bus; struct lock_class_key *qdisc_tx_busylock; diff --git a/include/linux/phy.h b/include/linux/phy.h index 6cb9d843aee9..e9e85d347587 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -544,9 +544,6 @@ struct macsec_ops; * @drv: Pointer to the driver for this PHY instance * @devlink: Create a link between phy dev and mac dev, if the external phy * used by current mac interface is managed by another mac interface. - * @phyindex: Unique id across the phy's parent tree of phys to address the PHY - * from userspace, similar to ifindex. A zero index means the PHY - * wasn't assigned an id yet. * @phy_id: UID for this device found during discovery * @c45_ids: 802.3-c45 Device Identifiers if is_c45. * @is_c45: Set to true if this PHY uses clause 45 addressing. @@ -646,7 +643,6 @@ struct phy_device { struct device_link *devlink; - u32 phyindex; u32 phy_id; struct phy_c45_device_ids c45_ids; @@ -1726,8 +1722,6 @@ int phy_suspend(struct phy_device *phydev); int phy_resume(struct phy_device *phydev); int __phy_resume(struct phy_device *phydev); int phy_loopback(struct phy_device *phydev, bool enable); -int phy_sfp_connect_phy(void *upstream, struct phy_device *phy); -void phy_sfp_disconnect_phy(void *upstream, struct phy_device *phy); void phy_sfp_attach(void *upstream, struct sfp_bus *bus); void phy_sfp_detach(void *upstream, struct sfp_bus *bus); int phy_sfp_probe(struct phy_device *phydev, diff --git a/include/linux/phy_link_topology.h b/include/linux/phy_link_topology.h deleted file mode 100644 index 91902263ec0e..000000000000 --- a/include/linux/phy_link_topology.h +++ /dev/null @@ -1,67 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * PHY device list allow maintaining a list of PHY devices that are - * part of a netdevice's link topology. PHYs can for example be chained, - * as is the case when using a PHY that exposes an SFP module, on which an - * SFP transceiver that embeds a PHY is connected. - * - * This list can then be used by userspace to leverage individual PHY - * capabilities. - */ -#ifndef __PHY_LINK_TOPOLOGY_H -#define __PHY_LINK_TOPOLOGY_H - -#include -#include - -struct xarray; -struct phy_device; -struct net_device; -struct sfp_bus; - -struct phy_device_node { - enum phy_upstream upstream_type; - - union { - struct net_device *netdev; - struct phy_device *phydev; - } upstream; - - struct sfp_bus *parent_sfp_bus; - - struct phy_device *phy; -}; - -static inline struct phy_device * -phy_link_topo_get_phy(struct phy_link_topology *topo, u32 phyindex) -{ - struct phy_device_node *pdn = xa_load(&topo->phys, phyindex); - - if (pdn) - return pdn->phy; - - return NULL; -} - -#if IS_ENABLED(CONFIG_PHYLIB) -int phy_link_topo_add_phy(struct phy_link_topology *topo, - struct phy_device *phy, - enum phy_upstream upt, void *upstream); - -void phy_link_topo_del_phy(struct phy_link_topology *lt, struct phy_device *phy); - -#else -static inline int phy_link_topo_add_phy(struct phy_link_topology *topo, - struct phy_device *phy, - enum phy_upstream upt, void *upstream) -{ - return 0; -} - -static inline void phy_link_topo_del_phy(struct phy_link_topology *topo, - struct phy_device *phy) -{ -} -#endif - -#endif /* __PHY_LINK_TOPOLOGY_H */ diff --git a/include/linux/phy_link_topology_core.h b/include/linux/phy_link_topology_core.h deleted file mode 100644 index 78c75f909489..000000000000 --- a/include/linux/phy_link_topology_core.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PHY_LINK_TOPOLOGY_CORE_H -#define __PHY_LINK_TOPOLOGY_CORE_H - -struct xarray; - -struct phy_link_topology { - struct xarray phys; - - u32 next_phy_index; -}; - -static inline void phy_link_topo_init(struct phy_link_topology *topo) -{ - xa_init_flags(&topo->phys, XA_FLAGS_ALLOC1); - topo->next_phy_index = 1; -} - -#endif /* __PHY_LINK_TOPOLOGY_CORE_H */ diff --git a/include/linux/sfp.h b/include/linux/sfp.h index 55c0ab17c9e2..9346cd44814d 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -544,7 +544,7 @@ struct sfp_upstream_ops { void (*link_down)(void *priv); void (*link_up)(void *priv); int (*connect_phy)(void *priv, struct phy_device *); - void (*disconnect_phy)(void *priv, struct phy_device *); + void (*disconnect_phy)(void *priv); }; #if IS_ENABLED(CONFIG_SFP) @@ -570,7 +570,6 @@ struct sfp_bus *sfp_bus_find_fwnode(const struct fwnode_handle *fwnode); int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, const struct sfp_upstream_ops *ops); void sfp_bus_del_upstream(struct sfp_bus *bus); -const char *sfp_get_name(struct sfp_bus *bus); #else static inline int sfp_parse_port(struct sfp_bus *bus, const struct sfp_eeprom_id *id, @@ -649,11 +648,6 @@ static inline int sfp_bus_add_upstream(struct sfp_bus *bus, void *upstream, static inline void sfp_bus_del_upstream(struct sfp_bus *bus) { } - -static inline const char *sfp_get_name(struct sfp_bus *bus) -{ - return NULL; -} #endif #endif diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 01ba529dbb6d..06ef6b78b7de 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2220,20 +2220,4 @@ struct ethtool_link_settings { * __u32 map_lp_advertising[link_mode_masks_nwords]; */ }; - -/** - * enum phy_upstream - Represents the upstream component a given PHY device - * is connected to, as in what is on the other end of the MII bus. Most PHYs - * will be attached to an Ethernet MAC controller, but in some cases, there's - * an intermediate PHY used as a media-converter, which will driver another - * MII interface as its output. - * @PHY_UPSTREAM_MAC: Upstream component is a MAC (a switch port, - * or ethernet controller) - * @PHY_UPSTREAM_PHY: Upstream component is a PHY (likely a media converter) - */ -enum phy_upstream { - PHY_UPSTREAM_MAC, - PHY_UPSTREAM_PHY, -}; - #endif /* _UAPI_LINUX_ETHTOOL_H */ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 00cd7ad16709..3f89074aa06c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,7 +57,6 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, - ETHTOOL_MSG_PHY_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -110,8 +109,6 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, - ETHTOOL_MSG_PHY_GET_REPLY, - ETHTOOL_MSG_PHY_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -136,7 +133,6 @@ enum { ETHTOOL_A_HEADER_DEV_INDEX, /* u32 */ ETHTOOL_A_HEADER_DEV_NAME, /* string */ ETHTOOL_A_HEADER_FLAGS, /* u32 - ETHTOOL_FLAG_* */ - ETHTOOL_A_HEADER_PHY_INDEX, /* u32 */ /* add new constants above here */ __ETHTOOL_A_HEADER_CNT, @@ -980,32 +976,6 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; -enum { - ETHTOOL_A_PHY_UPSTREAM_UNSPEC, - ETHTOOL_A_PHY_UPSTREAM_INDEX, /* u32 */ - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, /* string */ - - /* add new constants above here */ - __ETHTOOL_A_PHY_UPSTREAM_CNT, - ETHTOOL_A_PHY_UPSTREAM_MAX = (__ETHTOOL_A_PHY_UPSTREAM_CNT - 1) -}; - -enum { - ETHTOOL_A_PHY_UNSPEC, - ETHTOOL_A_PHY_HEADER, /* nest - _A_HEADER_* */ - ETHTOOL_A_PHY_INDEX, /* u32 */ - ETHTOOL_A_PHY_DRVNAME, /* string */ - ETHTOOL_A_PHY_NAME, /* string */ - ETHTOOL_A_PHY_UPSTREAM_TYPE, /* u8 */ - ETHTOOL_A_PHY_UPSTREAM, /* nest - _A_PHY_UPSTREAM_* */ - ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, /* string */ - ETHTOOL_A_PHY_ID, /* u32 */ - - /* add new constants above here */ - __ETHTOOL_A_PHY_CNT, - ETHTOOL_A_PHY_MAX = (__ETHTOOL_A_PHY_CNT - 1) -}; - /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/core/dev.c b/net/core/dev.c index bc4ac49d4643..f01a9b858347 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -153,7 +153,6 @@ #include #include #include -#include #include "dev.h" #include "net-sysfs.h" @@ -10876,8 +10875,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, #ifdef CONFIG_NET_SCHED hash_init(dev->qdisc_hash); #endif - phy_link_topo_init(&dev->link_topo); - dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 0ccd0e9afd3f..504f954a1b28 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ - module.o pse-pd.o plca.o mm.o phy.o + module.o pse-pd.o plca.o mm.o diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 6b00d0800f23..06a151165c31 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -69,7 +69,7 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) return ret; dev = req_info.dev; - if (!req_info.phydev) { + if (!dev->phydev) { ret = -EOPNOTSUPP; goto out_dev_put; } @@ -85,12 +85,12 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info) if (ret < 0) goto out_rtnl; - ret = ops->start_cable_test(req_info.phydev, info->extack); + ret = ops->start_cable_test(dev->phydev, info->extack); ethnl_ops_complete(dev); if (!ret) - ethnl_cable_test_started(req_info.phydev, + ethnl_cable_test_started(dev->phydev, ETHTOOL_MSG_CABLE_TEST_NTF); out_rtnl: @@ -321,7 +321,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) return ret; dev = req_info.dev; - if (!req_info.phydev) { + if (!dev->phydev) { ret = -EOPNOTSUPP; goto out_dev_put; } @@ -342,12 +342,12 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info) if (ret < 0) goto out_rtnl; - ret = ops->start_cable_test_tdr(req_info.phydev, info->extack, &cfg); + ret = ops->start_cable_test_tdr(dev->phydev, info->extack, &cfg); ethnl_ops_complete(dev); if (!ret) - ethnl_cable_test_started(req_info.phydev, + ethnl_cable_test_started(dev->phydev, ETHTOOL_MSG_CABLE_TEST_TDR_NTF); out_rtnl: diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 92b0dd8ca046..fe3553f60bf3 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -4,7 +4,6 @@ #include #include #include "netlink.h" -#include static struct genl_family ethtool_genl_family; @@ -21,7 +20,6 @@ const struct nla_policy ethnl_header_policy[] = { .len = ALTIFNAMSIZ - 1 }, [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, ETHTOOL_FLAGS_BASIC), - [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; const struct nla_policy ethnl_header_policy_stats[] = { @@ -30,7 +28,6 @@ const struct nla_policy ethnl_header_policy_stats[] = { .len = ALTIFNAMSIZ - 1 }, [ETHTOOL_A_HEADER_FLAGS] = NLA_POLICY_MASK(NLA_U32, ETHTOOL_FLAGS_STATS), - [ETHTOOL_A_HEADER_PHY_INDEX] = NLA_POLICY_MIN(NLA_U32, 1), }; int ethnl_ops_begin(struct net_device *dev) @@ -94,7 +91,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, { struct nlattr *tb[ARRAY_SIZE(ethnl_header_policy)]; const struct nlattr *devname_attr; - struct phy_device *phydev = NULL; struct net_device *dev = NULL; u32 flags = 0; int ret; @@ -149,26 +145,6 @@ int ethnl_parse_header_dev_get(struct ethnl_req_info *req_info, return -EINVAL; } - if (dev) { - if (tb[ETHTOOL_A_HEADER_PHY_INDEX]) { - u32 phy_index = nla_get_u32(tb[ETHTOOL_A_HEADER_PHY_INDEX]); - - phydev = phy_link_topo_get_phy(&dev->link_topo, - phy_index); - if (!phydev) { - NL_SET_ERR_MSG_ATTR(extack, header, - "no phy matches phy index"); - return -EINVAL; - } - } else { - /* If we need a PHY but no phy index is specified, fallback - * to dev->phydev - */ - phydev = dev->phydev; - } - } - - req_info->phydev = phydev; req_info->dev = dev; req_info->flags = flags; return 0; @@ -1153,15 +1129,6 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_mm_set_policy, .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1, }, - { - .cmd = ETHTOOL_MSG_PHY_GET, - .doit = ethnl_phy_doit, - .start = ethnl_phy_start, - .dumpit = ethnl_phy_dumpit, - .done = ethnl_phy_done, - .policy = ethnl_phy_get_policy, - .maxattr = ARRAY_SIZE(ethnl_phy_get_policy) - 1, - }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 5e6a43e35a09..9a333a8d04c1 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -250,7 +250,6 @@ static inline unsigned int ethnl_reply_header_size(void) * @dev: network device the request is for (may be null) * @dev_tracker: refcount tracker for @dev reference * @flags: request flags common for all request types - * @phydev: phy_device connected to @dev this request is for (may be null) * * This is a common base for request specific structures holding data from * parsed userspace request. These always embed struct ethnl_req_info at @@ -260,7 +259,6 @@ struct ethnl_req_info { struct net_device *dev; netdevice_tracker dev_tracker; u32 flags; - struct phy_device *phydev; }; static inline void ethnl_parse_header_dev_put(struct ethnl_req_info *req_info) @@ -397,10 +395,9 @@ extern const struct ethnl_request_ops ethnl_rss_request_ops; extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops; extern const struct ethnl_request_ops ethnl_plca_status_request_ops; extern const struct ethnl_request_ops ethnl_mm_request_ops; -extern const struct ethnl_request_ops ethnl_phy_request_ops; -extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_PHY_INDEX + 1]; -extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_PHY_INDEX + 1]; +extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1]; +extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1]; extern const struct nla_policy ethnl_strset_get_policy[ETHTOOL_A_STRSET_COUNTS_ONLY + 1]; extern const struct nla_policy ethnl_linkinfo_get_policy[ETHTOOL_A_LINKINFO_HEADER + 1]; extern const struct nla_policy ethnl_linkinfo_set_policy[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL + 1]; @@ -444,7 +441,6 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1] extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1]; extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; -extern const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); @@ -452,10 +448,6 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); -int ethnl_phy_start(struct netlink_callback *cb); -int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info); -int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb); -int ethnl_phy_done(struct netlink_callback *cb); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c deleted file mode 100644 index 5add2840aaeb..000000000000 --- a/net/ethtool/phy.c +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright 2023 Bootlin - * - */ -#include "common.h" -#include "netlink.h" - -#include -#include -#include - -struct phy_req_info { - struct ethnl_req_info base; - struct phy_device_node pdn; -}; - -#define PHY_REQINFO(__req_base) \ - container_of(__req_base, struct phy_req_info, base) - -const struct nla_policy ethnl_phy_get_policy[ETHTOOL_A_PHY_HEADER + 1] = { - [ETHTOOL_A_PHY_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), -}; - -/* Caller holds rtnl */ -static ssize_t -ethnl_phy_reply_size(const struct ethnl_req_info *req_base, - struct netlink_ext_ack *extack) -{ - struct phy_link_topology *topo; - struct phy_device_node *pdn; - struct phy_device *phydev; - unsigned long index; - size_t size; - - ASSERT_RTNL(); - - topo = &req_base->dev->link_topo; - - size = nla_total_size(0); - - xa_for_each(&topo->phys, index, pdn) { - phydev = pdn->phy; - - /* ETHTOOL_A_PHY_INDEX */ - size += nla_total_size(sizeof(u32)); - - /* ETHTOOL_A_DRVNAME */ - size += nla_total_size(strlen(phydev->drv->name) + 1); - - /* ETHTOOL_A_NAME */ - size += nla_total_size(strlen(dev_name(&phydev->mdio.dev)) + 1); - - /* ETHTOOL_A_PHY_UPSTREAM_TYPE */ - size += nla_total_size(sizeof(u8)); - - /* ETHTOOL_A_PHY_ID */ - size += nla_total_size(sizeof(u32)); - - if (phy_on_sfp(phydev)) { - const char *upstream_sfp_name = sfp_get_name(pdn->parent_sfp_bus); - - /* ETHTOOL_A_PHY_UPSTREAM_SFP_NAME */ - if (upstream_sfp_name) - size += nla_total_size(strlen(upstream_sfp_name) + 1); - - /* ETHTOOL_A_PHY_UPSTREAM_INDEX */ - size += nla_total_size(sizeof(u32)); - } - - /* ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME */ - if (phydev->sfp_bus) { - const char *sfp_name = sfp_get_name(phydev->sfp_bus); - - if (sfp_name) - size += nla_total_size(strlen(sfp_name) + 1); - } - } - - return size; -} - -static int -ethnl_phy_fill_reply(const struct ethnl_req_info *req_base, struct sk_buff *skb) -{ - struct phy_req_info *req_info = PHY_REQINFO(req_base); - struct phy_device_node *pdn = &req_info->pdn; - struct phy_device *phydev = pdn->phy; - enum phy_upstream ptype; - struct nlattr *nest; - - ptype = pdn->upstream_type; - - if (nla_put_u32(skb, ETHTOOL_A_PHY_INDEX, phydev->phyindex) || - nla_put_string(skb, ETHTOOL_A_PHY_DRVNAME, phydev->drv->name) || - nla_put_string(skb, ETHTOOL_A_PHY_NAME, dev_name(&phydev->mdio.dev)) || - nla_put_u8(skb, ETHTOOL_A_PHY_UPSTREAM_TYPE, ptype) || - nla_put_u32(skb, ETHTOOL_A_PHY_ID, phydev->phy_id)) - return -EMSGSIZE; - - if (ptype == PHY_UPSTREAM_PHY) { - struct phy_device *upstream = pdn->upstream.phydev; - const char *sfp_upstream_name; - - nest = nla_nest_start(skb, ETHTOOL_A_PHY_UPSTREAM); - if (!nest) - return -EMSGSIZE; - - /* Parent index */ - if (nla_put_u32(skb, ETHTOOL_A_PHY_UPSTREAM_INDEX, upstream->phyindex)) - return -EMSGSIZE; - - if (pdn->parent_sfp_bus) { - sfp_upstream_name = sfp_get_name(pdn->parent_sfp_bus); - if (sfp_upstream_name && nla_put_string(skb, - ETHTOOL_A_PHY_UPSTREAM_SFP_NAME, - sfp_upstream_name)) - return -EMSGSIZE; - } - - nla_nest_end(skb, nest); - } - - if (phydev->sfp_bus) { - const char *sfp_name = sfp_get_name(phydev->sfp_bus); - - if (sfp_name && - nla_put_string(skb, ETHTOOL_A_PHY_DOWNSTREAM_SFP_NAME, - sfp_name)) - return -EMSGSIZE; - } - - return 0; -} - -static int ethnl_phy_parse_request(struct ethnl_req_info *req_base, - struct nlattr **tb) -{ - struct phy_link_topology *topo = &req_base->dev->link_topo; - struct phy_req_info *req_info = PHY_REQINFO(req_base); - struct phy_device_node *pdn; - - if (!req_base->phydev) - return 0; - - pdn = xa_load(&topo->phys, req_base->phydev->phyindex); - memcpy(&req_info->pdn, pdn, sizeof(*pdn)); - - return 0; -} - -int ethnl_phy_doit(struct sk_buff *skb, struct genl_info *info) -{ - struct phy_req_info req_info = {}; - struct nlattr **tb = info->attrs; - struct sk_buff *rskb; - void *reply_payload; - int reply_len; - int ret; - - ret = ethnl_parse_header_dev_get(&req_info.base, - tb[ETHTOOL_A_PHY_HEADER], - genl_info_net(info), info->extack, - true); - if (ret < 0) - return ret; - - rtnl_lock(); - - ret = ethnl_phy_parse_request(&req_info.base, tb); - if (ret < 0) - goto err_unlock_rtnl; - - /* No PHY, return early */ - if (!req_info.pdn.phy) - goto err_unlock_rtnl; - - ret = ethnl_phy_reply_size(&req_info.base, info->extack); - if (ret < 0) - goto err_unlock_rtnl; - reply_len = ret + ethnl_reply_header_size(); - - rskb = ethnl_reply_init(reply_len, req_info.base.dev, - ETHTOOL_MSG_PHY_GET_REPLY, - ETHTOOL_A_PHY_HEADER, - info, &reply_payload); - if (!rskb) { - ret = -ENOMEM; - goto err_unlock_rtnl; - } - - ret = ethnl_phy_fill_reply(&req_info.base, rskb); - if (ret) - goto err_free_msg; - - rtnl_unlock(); - ethnl_parse_header_dev_put(&req_info.base); - genlmsg_end(rskb, reply_payload); - - return genlmsg_reply(rskb, info); - -err_free_msg: - nlmsg_free(rskb); -err_unlock_rtnl: - rtnl_unlock(); - ethnl_parse_header_dev_put(&req_info.base); - return ret; -} - -struct ethnl_phy_dump_ctx { - struct phy_req_info *phy_req_info; -}; - -int ethnl_phy_start(struct netlink_callback *cb) -{ - const struct genl_dumpit_info *info = genl_dumpit_info(cb); - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; - struct nlattr **tb = info->info.attrs; - int ret; - - BUILD_BUG_ON(sizeof(*ctx) > sizeof(cb->ctx)); - - ctx->phy_req_info = kzalloc(sizeof(*ctx->phy_req_info), GFP_KERNEL); - if (!ctx->phy_req_info) - return -ENOMEM; - - ret = ethnl_parse_header_dev_get(&ctx->phy_req_info->base, - tb[ETHTOOL_A_PHY_HEADER], - sock_net(cb->skb->sk), cb->extack, - false); - return ret; -} - -int ethnl_phy_done(struct netlink_callback *cb) -{ - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; - - kfree(ctx->phy_req_info); - - return 0; -} - -static int ethnl_phy_dump_one_dev(struct sk_buff *skb, struct net_device *dev, - struct netlink_callback *cb) -{ - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; - struct phy_req_info *pri = ctx->phy_req_info; - struct phy_device_node *pdn; - unsigned long index = 1; - int ret = 0; - void *ehdr; - - pri->base.dev = dev; - - xa_for_each(&dev->link_topo.phys, index, pdn) { - ehdr = ethnl_dump_put(skb, cb, - ETHTOOL_MSG_PHY_GET_REPLY); - if (!ehdr) { - ret = -EMSGSIZE; - break; - } - - ret = ethnl_fill_reply_header(skb, dev, - ETHTOOL_A_PHY_HEADER); - if (ret < 0) { - genlmsg_cancel(skb, ehdr); - break; - } - - memcpy(&pri->pdn, pdn, sizeof(*pdn)); - ret = ethnl_phy_fill_reply(&pri->base, skb); - - genlmsg_end(skb, ehdr); - } - - return ret; -} - -int ethnl_phy_dumpit(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ethnl_phy_dump_ctx *ctx = (void *)cb->ctx; - struct net *net = sock_net(skb->sk); - unsigned long ifindex = 1; - struct net_device *dev; - int ret = 0; - - rtnl_lock(); - - if (ctx->phy_req_info->base.dev) { - ret = ethnl_phy_dump_one_dev(skb, ctx->phy_req_info->base.dev, cb); - ethnl_parse_header_dev_put(&ctx->phy_req_info->base); - ctx->phy_req_info->base.dev = NULL; - } else { - for_each_netdev_dump(net, dev, ifindex) { - ret = ethnl_phy_dump_one_dev(skb, dev, cb); - if (ret) - break; - } - } - rtnl_unlock(); - - if (ret == -EMSGSIZE && skb->len) - return skb->len; - return ret; -} - diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index 2b3e419f4dc2..b1e2e3b5027f 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -61,7 +61,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, int ret; // check that the PHY device is available and connected - if (!req_base->phydev) { + if (!dev->phydev) { ret = -EOPNOTSUPP; goto out; } @@ -80,7 +80,7 @@ static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base, memset(&data->plca_cfg, 0xff, sizeof_field(struct plca_reply_data, plca_cfg)); - ret = ops->get_plca_cfg(req_base->phydev, &data->plca_cfg); + ret = ops->get_plca_cfg(dev->phydev, &data->plca_cfg); ethnl_ops_complete(dev); out: @@ -141,6 +141,7 @@ const struct nla_policy ethnl_plca_set_cfg_policy[] = { static int ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) { + struct net_device *dev = req_info->dev; const struct ethtool_phy_ops *ops; struct nlattr **tb = info->attrs; struct phy_plca_cfg plca_cfg; @@ -148,7 +149,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) int ret; // check that the PHY device is available and connected - if (!req_info->phydev) + if (!dev->phydev) return -EOPNOTSUPP; ops = ethtool_phy_ops; @@ -167,7 +168,7 @@ ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info) if (!mod) return 0; - ret = ops->set_plca_cfg(req_info->phydev, &plca_cfg, info->extack); + ret = ops->set_plca_cfg(dev->phydev, &plca_cfg, info->extack); return ret < 0 ? ret : 1; } @@ -203,7 +204,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, int ret; // check that the PHY device is available and connected - if (!req_base->phydev) { + if (!dev->phydev) { ret = -EOPNOTSUPP; goto out; } @@ -222,7 +223,7 @@ static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base, memset(&data->plca_st, 0xff, sizeof_field(struct plca_reply_data, plca_st)); - ret = ops->get_plca_status(req_base->phydev, &data->plca_st); + ret = ops->get_plca_status(dev->phydev, &data->plca_st); ethnl_ops_complete(dev); out: return ret; diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 4a1c8d37bd3d..cc478af77111 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -31,10 +31,12 @@ const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1] = { [ETHTOOL_A_PSE_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy), }; -static int pse_get_pse_attributes(struct phy_device *phydev, +static int pse_get_pse_attributes(struct net_device *dev, struct netlink_ext_ack *extack, struct pse_reply_data *data) { + struct phy_device *phydev = dev->phydev; + if (!phydev) { NL_SET_ERR_MSG(extack, "No PHY is attached"); return -EOPNOTSUPP; @@ -62,7 +64,7 @@ static int pse_prepare_data(const struct ethnl_req_info *req_base, if (ret < 0) return ret; - ret = pse_get_pse_attributes(req_base->phydev, info->extack, data); + ret = pse_get_pse_attributes(dev, info->extack, data); ethnl_ops_complete(dev); @@ -122,6 +124,7 @@ ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info) static int ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) { + struct net_device *dev = req_info->dev; struct pse_control_config config = {}; struct nlattr **tb = info->attrs; struct phy_device *phydev; @@ -129,7 +132,7 @@ ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info) /* this values are already validated by the ethnl_pse_set_policy */ config.admin_cotrol = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]); - phydev = req_info->phydev; + phydev = dev->phydev; if (!phydev) { NL_SET_ERR_MSG(info->extack, "No PHY is attached"); return -EOPNOTSUPP; diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 70c00631c51f..c678b484a079 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -233,18 +233,17 @@ static void strset_cleanup_data(struct ethnl_reply_data *reply_base) } static int strset_prepare_set(struct strset_info *info, struct net_device *dev, - struct phy_device *phydev, unsigned int id, - bool counts_only) + unsigned int id, bool counts_only) { const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops; const struct ethtool_ops *ops = dev->ethtool_ops; void *strings; int count, ret; - if (id == ETH_SS_PHY_STATS && phydev && + if (id == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_sset_count) - ret = phy_ops->get_sset_count(phydev); + ret = phy_ops->get_sset_count(dev->phydev); else if (ops->get_sset_count && ops->get_strings) ret = ops->get_sset_count(dev, id); else @@ -259,10 +258,10 @@ static int strset_prepare_set(struct strset_info *info, struct net_device *dev, strings = kcalloc(count, ETH_GSTRING_LEN, GFP_KERNEL); if (!strings) return -ENOMEM; - if (id == ETH_SS_PHY_STATS && phydev && + if (id == ETH_SS_PHY_STATS && dev->phydev && !ops->get_ethtool_phy_stats && phy_ops && phy_ops->get_strings) - phy_ops->get_strings(phydev, strings); + phy_ops->get_strings(dev->phydev, strings); else ops->get_strings(dev, id, strings); info->strings = strings; @@ -306,8 +305,8 @@ static int strset_prepare_data(const struct ethnl_req_info *req_base, !data->sets[i].per_dev) continue; - ret = strset_prepare_set(&data->sets[i], dev, req_base->phydev, - i, req_info->counts_only); + ret = strset_prepare_set(&data->sets[i], dev, i, + req_info->counts_only); if (ret < 0) goto err_ops; } -- cgit v1.3.1 From 8a6286c1804e2c7144aef3154a0357c4b496e10b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 3 Jan 2024 14:28:36 +0100 Subject: dpll: expose fractional frequency offset value to user Add a new netlink attribute to expose fractional frequency offset value for a pin. Add an op to get the value from the driver. Signed-off-by: Jiri Pirko Acked-by: Vadim Fedorenko Acked-by: Arkadiusz Kubalewski Link: https://lore.kernel.org/r/20240103132838.1501801-2-jiri@resnulli.us Signed-off-by: Jakub Kicinski --- Documentation/netlink/specs/dpll.yaml | 11 +++++++++++ drivers/dpll/dpll_netlink.c | 24 ++++++++++++++++++++++++ include/linux/dpll.h | 3 +++ include/uapi/linux/dpll.h | 1 + 4 files changed, 39 insertions(+) (limited to 'Documentation') diff --git a/Documentation/netlink/specs/dpll.yaml b/Documentation/netlink/specs/dpll.yaml index cf8abe1c0550..b14aed18065f 100644 --- a/Documentation/netlink/specs/dpll.yaml +++ b/Documentation/netlink/specs/dpll.yaml @@ -296,6 +296,16 @@ attribute-sets: - name: phase-offset type: s64 + - + name: fractional-frequency-offset + type: sint + doc: | + The FFO (Fractional Frequency Offset) between the RX and TX + symbol rate on the media associated with the pin: + (rx_frequency-tx_frequency)/rx_frequency + Value is in PPM (parts per million). + This may be implemented for example for pin of type + PIN_TYPE_SYNCE_ETH_PORT. - name: pin-parent-device subset-of: pin @@ -460,6 +470,7 @@ operations: - phase-adjust-min - phase-adjust-max - phase-adjust + - fractional-frequency-offset dump: pre: dpll-lock-dumpit diff --git a/drivers/dpll/dpll_netlink.c b/drivers/dpll/dpll_netlink.c index 21c627e9401a..3370dbddb86b 100644 --- a/drivers/dpll/dpll_netlink.c +++ b/drivers/dpll/dpll_netlink.c @@ -263,6 +263,27 @@ dpll_msg_add_phase_offset(struct sk_buff *msg, struct dpll_pin *pin, return 0; } +static int dpll_msg_add_ffo(struct sk_buff *msg, struct dpll_pin *pin, + struct dpll_pin_ref *ref, + struct netlink_ext_ack *extack) +{ + const struct dpll_pin_ops *ops = dpll_pin_ops(ref); + struct dpll_device *dpll = ref->dpll; + s64 ffo; + int ret; + + if (!ops->ffo_get) + return 0; + ret = ops->ffo_get(pin, dpll_pin_on_dpll_priv(dpll, pin), + dpll, dpll_priv(dpll), &ffo, extack); + if (ret) { + if (ret == -ENODATA) + return 0; + return ret; + } + return nla_put_sint(msg, DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET, ffo); +} + static int dpll_msg_add_pin_freq(struct sk_buff *msg, struct dpll_pin *pin, struct dpll_pin_ref *ref, struct netlink_ext_ack *extack) @@ -440,6 +461,9 @@ dpll_cmd_pin_get_one(struct sk_buff *msg, struct dpll_pin *pin, prop->phase_range.max)) return -EMSGSIZE; ret = dpll_msg_add_pin_phase_adjust(msg, pin, ref, extack); + if (ret) + return ret; + ret = dpll_msg_add_ffo(msg, pin, ref, extack); if (ret) return ret; if (xa_empty(&pin->parent_refs)) diff --git a/include/linux/dpll.h b/include/linux/dpll.h index b1a5f9ca8ee5..9cf896ea1d41 100644 --- a/include/linux/dpll.h +++ b/include/linux/dpll.h @@ -77,6 +77,9 @@ struct dpll_pin_ops { const struct dpll_device *dpll, void *dpll_priv, const s32 phase_adjust, struct netlink_ext_ack *extack); + int (*ffo_get)(const struct dpll_pin *pin, void *pin_priv, + const struct dpll_device *dpll, void *dpll_priv, + s64 *ffo, struct netlink_ext_ack *extack); }; struct dpll_pin_frequency { diff --git a/include/uapi/linux/dpll.h b/include/uapi/linux/dpll.h index 715a491d2727..b4e947f9bfbc 100644 --- a/include/uapi/linux/dpll.h +++ b/include/uapi/linux/dpll.h @@ -179,6 +179,7 @@ enum dpll_a_pin { DPLL_A_PIN_PHASE_ADJUST_MAX, DPLL_A_PIN_PHASE_ADJUST, DPLL_A_PIN_PHASE_OFFSET, + DPLL_A_PIN_FRACTIONAL_FREQUENCY_OFFSET, __DPLL_A_PIN_MAX, DPLL_A_PIN_MAX = (__DPLL_A_PIN_MAX - 1) -- cgit v1.3.1 From 67d47c8ada0f8795bfcdb85cc8f2ad3ce556674b Mon Sep 17 00:00:00 2001 From: Swee Leong Ching Date: Fri, 5 Jan 2024 15:09:22 +0800 Subject: dt-bindings: net: snps,dwmac: per channel irq Add dt-bindings for per channel irq. Signed-off-by: Rohan G Thomas Signed-off-by: Swee Leong Ching Signed-off-by: David S. Miller --- .../devicetree/bindings/net/snps,dwmac.yaml | 24 +++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 5c2769dc689a..e72dded824f4 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -103,17 +103,27 @@ properties: interrupts: minItems: 1 - items: - - description: Combined signal for various interrupt events - - description: The interrupt to manage the remote wake-up packet detection - - description: The interrupt that occurs when Rx exits the LPI state + maxItems: 19 interrupt-names: minItems: 1 + maxItems: 19 items: - - const: macirq - - enum: [eth_wake_irq, eth_lpi] - - const: eth_lpi + oneOf: + - description: Combined signal for various interrupt events + const: macirq + - description: The interrupt to manage the remote wake-up packet detection + const: eth_wake_irq + - description: The interrupt that occurs when Rx exits the LPI state + const: eth_lpi + - description: DMA Tx per-channel interrupt + pattern: '^dma_tx[0-7]?$' + - description: DMA Rx per-channel interrupt + pattern: '^dma_rx[0-7]?$' + + allOf: + - contains: + const: macirq clocks: minItems: 1 -- cgit v1.3.1 From e9ee910218ffd420454b52a052d6f1087354905b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 7 Jan 2024 17:11:38 -0800 Subject: Revert "net: stmmac: Enable Per DMA Channel interrupt" Revert "net: stmmac: Use interrupt mode INTM=1 for per channel irq" This reverts commit 36af9f25ddfd311da82628f194c794786467cb12. Revert "net: stmmac: Add support for TX/RX channel interrupt" This reverts commit 9072e03d32088137a435ddf3aa95fd6e038d69d8. Revert "net: stmmac: Make MSI interrupt routine generic" This reverts commit 477bd4beb93bf9ace9bda71f1437b191befa9cf4. Revert "dt-bindings: net: snps,dwmac: per channel irq" This reverts commit 67d47c8ada0f8795bfcdb85cc8f2ad3ce556674b. Device tree bindings need to be reviewed. Link: https://lore.kernel.org/all/2df9fe3e-7971-4aa2-89a9-0e085b3b00d7@linaro.org/ Signed-off-by: Jakub Kicinski --- .../devicetree/bindings/net/snps,dwmac.yaml | 24 +++++----------- drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 4 +-- .../net/ethernet/stmicro/stmmac/dwmac-socfpga.c | 3 -- drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c | 2 +- drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h | 3 -- drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c | 32 +++++++++------------- drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 30 ++++++++++---------- .../net/ethernet/stmicro/stmmac/stmmac_platform.c | 28 ------------------- include/linux/stmmac.h | 4 +-- 9 files changed, 40 insertions(+), 90 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index e72dded824f4..5c2769dc689a 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -103,27 +103,17 @@ properties: interrupts: minItems: 1 - maxItems: 19 + items: + - description: Combined signal for various interrupt events + - description: The interrupt to manage the remote wake-up packet detection + - description: The interrupt that occurs when Rx exits the LPI state interrupt-names: minItems: 1 - maxItems: 19 items: - oneOf: - - description: Combined signal for various interrupt events - const: macirq - - description: The interrupt to manage the remote wake-up packet detection - const: eth_wake_irq - - description: The interrupt that occurs when Rx exits the LPI state - const: eth_lpi - - description: DMA Tx per-channel interrupt - pattern: '^dma_tx[0-7]?$' - - description: DMA Rx per-channel interrupt - pattern: '^dma_rx[0-7]?$' - - allOf: - - contains: - const: macirq + - const: macirq + - enum: [eth_wake_irq, eth_lpi] + - const: eth_lpi clocks: minItems: 1 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index f0ec69af96c9..60283543ffc8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -952,7 +952,7 @@ static int stmmac_config_single_msi(struct pci_dev *pdev, res->irq = pci_irq_vector(pdev, 0); res->wol_irq = res->irq; - plat->flags &= ~STMMAC_FLAG_MULTI_IRQ_EN; + plat->flags &= ~STMMAC_FLAG_MULTI_MSI_EN; dev_info(&pdev->dev, "%s: Single IRQ enablement successful\n", __func__); @@ -1004,7 +1004,7 @@ static int stmmac_config_multi_msi(struct pci_dev *pdev, if (plat->msi_sfty_ue_vec < STMMAC_MSI_VEC_MAX) res->sfty_ue_irq = pci_irq_vector(pdev, plat->msi_sfty_ue_vec); - plat->flags |= STMMAC_FLAG_MULTI_IRQ_EN; + plat->flags |= STMMAC_FLAG_MULTI_MSI_EN; dev_info(&pdev->dev, "%s: multi MSI enablement successful\n", __func__); return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index cf43fb3c6cc5..ba2ce776bd4d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -427,9 +427,6 @@ static int socfpga_dwmac_probe(struct platform_device *pdev) plat_dat->bsp_priv = dwmac; plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed; - if (stmmac_res.rx_irq[0] > 0 && stmmac_res.tx_irq[0] > 0) - plat_dat->flags |= STMMAC_FLAG_MULTI_IRQ_EN; - ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); if (ret) return ret; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 5f649106ffcd..84d3a8551b03 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -175,7 +175,7 @@ static void dwmac4_dma_init(void __iomem *ioaddr, value = readl(ioaddr + DMA_BUS_MODE); - if (dma_cfg->multi_irq_en) { + if (dma_cfg->multi_msi_en) { value &= ~DMA_BUS_MODE_INTM_MASK; value |= (DMA_BUS_MODE_INTM_MODE1 << DMA_BUS_MODE_INTM_SHIFT); } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h index 04bf731cb7ea..207ff1799f2c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2.h @@ -346,9 +346,6 @@ /* DMA Registers */ #define XGMAC_DMA_MODE 0x00003000 #define XGMAC_SWR BIT(0) -#define XGMAC_DMA_MODE_INTM_MASK GENMASK(13, 12) -#define XGMAC_DMA_MODE_INTM_SHIFT 12 -#define XGMAC_DMA_MODE_INTM_MODE1 0x1 #define XGMAC_DMA_SYSBUS_MODE 0x00003004 #define XGMAC_WR_OSR_LMT GENMASK(29, 24) #define XGMAC_WR_OSR_LMT_SHIFT 24 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index dcb9f094415d..3cde695fec91 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -31,13 +31,6 @@ static void dwxgmac2_dma_init(void __iomem *ioaddr, value |= XGMAC_EAME; writel(value, ioaddr + XGMAC_DMA_SYSBUS_MODE); - - if (dma_cfg->multi_irq_en) { - value = readl(ioaddr + XGMAC_DMA_MODE); - value &= ~XGMAC_DMA_MODE_INTM_MASK; - value |= (XGMAC_DMA_MODE_INTM_MODE1 << XGMAC_DMA_MODE_INTM_SHIFT); - writel(value, ioaddr + XGMAC_DMA_MODE); - } } static void dwxgmac2_dma_init_chan(struct stmmac_priv *priv, @@ -372,18 +365,19 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, } /* TX/RX NORMAL interrupts */ - if (likely(intr_status & XGMAC_RI)) { - u64_stats_update_begin(&rxq_stats->syncp); - rxq_stats->rx_normal_irq_n++; - u64_stats_update_end(&rxq_stats->syncp); - ret |= handle_rx; - } - - if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { - u64_stats_update_begin(&txq_stats->syncp); - txq_stats->tx_normal_irq_n++; - u64_stats_update_end(&txq_stats->syncp); - ret |= handle_tx; + if (likely(intr_status & XGMAC_NIS)) { + if (likely(intr_status & XGMAC_RI)) { + u64_stats_update_begin(&rxq_stats->syncp); + rxq_stats->rx_normal_irq_n++; + u64_stats_update_end(&rxq_stats->syncp); + ret |= handle_rx; + } + if (likely(intr_status & (XGMAC_TI | XGMAC_TBU))) { + u64_stats_update_begin(&txq_stats->syncp); + txq_stats->tx_normal_irq_n++; + u64_stats_update_end(&txq_stats->syncp); + ret |= handle_tx; + } } /* Clear interrupts */ diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index 57873b879b33..47de466e432c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -129,8 +129,8 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id); /* For MSI interrupts handling */ static irqreturn_t stmmac_mac_interrupt(int irq, void *dev_id); static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id); -static irqreturn_t stmmac_dma_tx_interrupt(int irq, void *data); -static irqreturn_t stmmac_dma_rx_interrupt(int irq, void *data); +static irqreturn_t stmmac_msi_intr_tx(int irq, void *data); +static irqreturn_t stmmac_msi_intr_rx(int irq, void *data); static void stmmac_reset_rx_queue(struct stmmac_priv *priv, u32 queue); static void stmmac_reset_tx_queue(struct stmmac_priv *priv, u32 queue); static void stmmac_reset_queues_param(struct stmmac_priv *priv); @@ -3602,7 +3602,7 @@ static void stmmac_free_irq(struct net_device *dev, } } -static int stmmac_request_irq_multi(struct net_device *dev) +static int stmmac_request_irq_multi_msi(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); enum request_irq_err irq_err; @@ -3697,7 +3697,7 @@ static int stmmac_request_irq_multi(struct net_device *dev) } } - /* Request Rx irq */ + /* Request Rx MSI irq */ for (i = 0; i < priv->plat->rx_queues_to_use; i++) { if (i >= MTL_MAX_RX_QUEUES) break; @@ -3707,11 +3707,11 @@ static int stmmac_request_irq_multi(struct net_device *dev) int_name = priv->int_name_rx_irq[i]; sprintf(int_name, "%s:%s-%d", dev->name, "rx", i); ret = request_irq(priv->rx_irq[i], - stmmac_dma_rx_interrupt, + stmmac_msi_intr_rx, 0, int_name, &priv->dma_conf.rx_queue[i]); if (unlikely(ret < 0)) { netdev_err(priv->dev, - "%s: alloc rx-%d dma rx_irq %d (error: %d)\n", + "%s: alloc rx-%d MSI %d (error: %d)\n", __func__, i, priv->rx_irq[i], ret); irq_err = REQ_IRQ_ERR_RX; irq_idx = i; @@ -3722,7 +3722,7 @@ static int stmmac_request_irq_multi(struct net_device *dev) irq_set_affinity_hint(priv->rx_irq[i], &cpu_mask); } - /* Request Tx irq */ + /* Request Tx MSI irq */ for (i = 0; i < priv->plat->tx_queues_to_use; i++) { if (i >= MTL_MAX_TX_QUEUES) break; @@ -3732,11 +3732,11 @@ static int stmmac_request_irq_multi(struct net_device *dev) int_name = priv->int_name_tx_irq[i]; sprintf(int_name, "%s:%s-%d", dev->name, "tx", i); ret = request_irq(priv->tx_irq[i], - stmmac_dma_tx_interrupt, + stmmac_msi_intr_tx, 0, int_name, &priv->dma_conf.tx_queue[i]); if (unlikely(ret < 0)) { netdev_err(priv->dev, - "%s: alloc tx-%d dma tx_irq %d (error: %d)\n", + "%s: alloc tx-%d MSI %d (error: %d)\n", __func__, i, priv->tx_irq[i], ret); irq_err = REQ_IRQ_ERR_TX; irq_idx = i; @@ -3811,8 +3811,8 @@ static int stmmac_request_irq(struct net_device *dev) int ret; /* Request the IRQ lines */ - if (priv->plat->flags & STMMAC_FLAG_MULTI_IRQ_EN) - ret = stmmac_request_irq_multi(dev); + if (priv->plat->flags & STMMAC_FLAG_MULTI_MSI_EN) + ret = stmmac_request_irq_multi_msi(dev); else ret = stmmac_request_irq_single(dev); @@ -6075,7 +6075,7 @@ static irqreturn_t stmmac_safety_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static irqreturn_t stmmac_dma_tx_interrupt(int irq, void *data) +static irqreturn_t stmmac_msi_intr_tx(int irq, void *data) { struct stmmac_tx_queue *tx_q = (struct stmmac_tx_queue *)data; struct stmmac_dma_conf *dma_conf; @@ -6107,7 +6107,7 @@ static irqreturn_t stmmac_dma_tx_interrupt(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t stmmac_dma_rx_interrupt(int irq, void *data) +static irqreturn_t stmmac_msi_intr_rx(int irq, void *data) { struct stmmac_rx_queue *rx_q = (struct stmmac_rx_queue *)data; struct stmmac_dma_conf *dma_conf; @@ -7456,8 +7456,8 @@ int stmmac_dvr_probe(struct device *device, priv->plat = plat_dat; priv->ioaddr = res->addr; priv->dev->base_addr = (unsigned long)res->addr; - priv->plat->dma_cfg->multi_irq_en = - (priv->plat->flags & STMMAC_FLAG_MULTI_IRQ_EN); + priv->plat->dma_cfg->multi_msi_en = + (priv->plat->flags & STMMAC_FLAG_MULTI_MSI_EN); priv->dev->irq = res->irq; priv->wol_irq = res->wol_irq; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index ae6859153e98..70eadc83ca68 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -710,10 +710,6 @@ EXPORT_SYMBOL_GPL(devm_stmmac_probe_config_dt); int stmmac_get_platform_resources(struct platform_device *pdev, struct stmmac_resources *stmmac_res) { - char irq_name[9]; - int i; - int irq; - memset(stmmac_res, 0, sizeof(*stmmac_res)); /* Get IRQ information early to have an ability to ask for deferred @@ -747,30 +743,6 @@ int stmmac_get_platform_resources(struct platform_device *pdev, dev_info(&pdev->dev, "IRQ eth_lpi not found\n"); } - /* For RX Channel */ - for (i = 0; i < MTL_MAX_RX_QUEUES; i++) { - snprintf(irq_name, sizeof(irq_name), "dma_rx%i", i); - irq = platform_get_irq_byname_optional(pdev, irq_name); - if (irq == -EPROBE_DEFER) - return irq; - else if (irq < 0) - break; - - stmmac_res->rx_irq[i] = irq; - } - - /* For TX Channel */ - for (i = 0; i < MTL_MAX_TX_QUEUES; i++) { - snprintf(irq_name, sizeof(irq_name), "dma_tx%i", i); - irq = platform_get_irq_byname_optional(pdev, irq_name); - if (irq == -EPROBE_DEFER) - return irq; - else if (irq < 0) - break; - - stmmac_res->tx_irq[i] = irq; - } - stmmac_res->addr = devm_platform_ioremap_resource(pdev, 0); return PTR_ERR_OR_ZERO(stmmac_res->addr); diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index b950e6f9761d..dee5ad6e48c5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -98,7 +98,7 @@ struct stmmac_dma_cfg { int mixed_burst; bool aal; bool eame; - bool multi_irq_en; + bool multi_msi_en; bool dche; }; @@ -215,7 +215,7 @@ struct dwmac4_addrs { #define STMMAC_FLAG_TSO_EN BIT(4) #define STMMAC_FLAG_SERDES_UP_AFTER_PHY_LINKUP BIT(5) #define STMMAC_FLAG_VLAN_FAIL_Q_EN BIT(6) -#define STMMAC_FLAG_MULTI_IRQ_EN BIT(7) +#define STMMAC_FLAG_MULTI_MSI_EN BIT(7) #define STMMAC_FLAG_EXT_SNAPSHOT_EN BIT(8) #define STMMAC_FLAG_INT_SNAPSHOT_EN BIT(9) #define STMMAC_FLAG_RX_CLK_RUNS_IN_LPI BIT(10) -- cgit v1.3.1