diff options
Diffstat (limited to 'include/net')
67 files changed, 2964 insertions, 285 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h index 61f2ceb3939e..2a6f443f0ef6 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -67,6 +67,7 @@ struct tc_action { #define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1)) #define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2)) #define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3)) +#define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4)) /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. @@ -100,11 +101,6 @@ static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) return hw_stats; } -#ifdef CONFIG_NET_CLS_ACT - -#define ACT_P_CREATED 1 -#define ACT_P_DELETED 1 - typedef void (*tc_action_priv_destructor)(void *priv); struct tc_action_ops { @@ -139,6 +135,11 @@ struct tc_action_ops { struct netlink_ext_ack *extack); }; +#ifdef CONFIG_NET_CLS_ACT + +#define ACT_P_CREATED 1 +#define ACT_P_DELETED 1 + struct tc_action_net { struct tcf_idrinfo *idrinfo; const struct tc_action_ops *ops; diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index b69ca695935c..d5a5ae926380 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -66,10 +66,10 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64); bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *); u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *); -bool rxrpc_kernel_call_is_complete(struct rxrpc_call *); void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *, unsigned long); int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val); +int rxrpc_sock_set_security_keyring(struct sock *, struct key *); #endif /* _NET_RXRPC_H */ diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 684f1cd28730..8d773b042c85 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -274,6 +274,26 @@ enum { * during the hdev->setup vendor callback. */ HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN, + + /* + * When this quirk is set, the HCI_OP_LE_SET_EXT_SCAN_ENABLE command is + * disabled. This is required for some Broadcom controllers which + * erroneously claim to support extended scanning. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_EXT_SCAN, + + /* + * When this quirk is set, the HCI_OP_GET_MWS_TRANSPORT_CONFIG command is + * disabled. This is required for some Broadcom controllers which + * erroneously claim to support MWS Transport Layer Configuration. + * + * This quirk can be set before hci_register_dev is called or + * during the hdev->setup vendor callback. + */ + HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, }; /* HCI device flags */ @@ -2590,6 +2610,7 @@ struct hci_ev_le_conn_complete { #define LE_EXT_ADV_DIRECT_IND 0x0004 #define LE_EXT_ADV_SCAN_RSP 0x0008 #define LE_EXT_ADV_LEGACY_PDU 0x0010 +#define LE_EXT_ADV_EVT_TYPE_MASK 0x007f #define ADDR_LE_DEV_PUBLIC 0x00 #define ADDR_LE_DEV_RANDOM 0x01 diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index c54bc71254af..7254edfba4c9 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -659,6 +659,7 @@ struct hci_dev { int (*set_diag)(struct hci_dev *hdev, bool enable); int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr); void (*cmd_timeout)(struct hci_dev *hdev); + void (*reset)(struct hci_dev *hdev); bool (*wakeup)(struct hci_dev *hdev); int (*set_quality_report)(struct hci_dev *hdev, bool enable); int (*get_data_path_id)(struct hci_dev *hdev, __u8 *data_path); @@ -1689,7 +1690,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn); /* Use ext scanning if set ext scan param and ext scan enable is supported */ #define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \ - ((dev)->commands[37] & 0x40)) + ((dev)->commands[37] & 0x40) && \ + !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks)) + /* Use ext create connection if command is supported */ #define use_ext_conn(dev) ((dev)->commands[37] & 0x80) @@ -1717,6 +1720,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn); ((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL) #define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER) +#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \ + (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks))) + /* ----- HCI protocols ----- */ #define HCI_PROTO_DEFER 0x01 diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h index 191c36afa1f4..9dc082b2d543 100644 --- a/include/net/bond_alb.h +++ b/include/net/bond_alb.h @@ -156,8 +156,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave); void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave); void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link); void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave); -int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); -int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev); +netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev); struct slave *bond_xmit_alb_slave_get(struct bonding *bond, struct sk_buff *skb); struct slave *bond_xmit_tlb_slave_get(struct bonding *bond, diff --git a/include/net/bonding.h b/include/net/bonding.h index e999f851738b..ea36ab7f9e72 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -92,8 +92,6 @@ #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) -#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX | NETIF_F_HW_TLS_RX) - #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; @@ -280,8 +278,6 @@ struct bond_vlan_tag { unsigned short vlan_id; }; -bool bond_sk_check(struct bonding *bond); - /** * Returns NULL if the net_device does not belong to any of the bond's slaves * diff --git a/include/net/cfg80211-wext.h b/include/net/cfg80211-wext.h index ad77caf2ffde..0ee36d97e068 100644 --- a/include/net/cfg80211-wext.h +++ b/include/net/cfg80211-wext.h @@ -19,34 +19,34 @@ */ int cfg80211_wext_giwname(struct net_device *dev, struct iw_request_info *info, - char *name, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info, - u32 *mode, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info, - u32 *mode, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_siwscan(struct net_device *dev, struct iw_request_info *info, union iwreq_data *wrqu, char *extra); int cfg80211_wext_giwscan(struct net_device *dev, struct iw_request_info *info, - struct iw_point *data, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_giwrange(struct net_device *dev, struct iw_request_info *info, - struct iw_point *data, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_siwrts(struct net_device *dev, struct iw_request_info *info, - struct iw_param *rts, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_giwrts(struct net_device *dev, struct iw_request_info *info, - struct iw_param *rts, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_siwfrag(struct net_device *dev, struct iw_request_info *info, - struct iw_param *frag, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_giwfrag(struct net_device *dev, struct iw_request_info *info, - struct iw_param *frag, char *extra); + union iwreq_data *wrqu, char *extra); int cfg80211_wext_giwretry(struct net_device *dev, struct iw_request_info *info, - struct iw_param *retry, char *extra); + union iwreq_data *wrqu, char *extra); #endif /* __NET_CFG80211_WEXT_H */ diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e09ff87146c1..03d4f4deadae 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2105,6 +2105,7 @@ struct mpath_info { * * Used to change BSS parameters (mainly for AP mode). * + * @link_id: link_id or -1 for non-MLD * @use_cts_prot: Whether to use CTS protection * (0 = no, 1 = yes, -1 = do not change) * @use_short_preamble: Whether the use of short preambles is allowed @@ -2122,6 +2123,7 @@ struct mpath_info { * @p2p_opp_ps: P2P opportunistic PS (-1 = no change) */ struct bss_parameters { + int link_id; int use_cts_prot; int use_short_preamble; int use_short_slot_time; @@ -4740,7 +4742,7 @@ struct ieee80211_iface_limit { * * struct ieee80211_iface_limit limits1[] = { * { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), }, - * { .max = 1, .types = BIT(NL80211_IFTYPE_AP}, }, + * { .max = 1, .types = BIT(NL80211_IFTYPE_AP), }, * }; * struct ieee80211_iface_combination combination1 = { * .limits = limits1, @@ -6933,6 +6935,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr); * @ap_mld_addr: AP MLD address (in case of MLO) * @links: per-link information indexed by link ID, use links[0] for * non-MLO connections + * @links.status: Set this (along with a BSS pointer) for links that + * were rejected by the AP. */ struct cfg80211_rx_assoc_resp { const u8 *buf; @@ -6944,6 +6948,7 @@ struct cfg80211_rx_assoc_resp { struct { const u8 *addr; struct cfg80211_bss *bss; + u16 status; } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; @@ -7454,6 +7459,9 @@ struct cfg80211_fils_resp_params { * if the bss is expired during the connection, esp. for those drivers * implementing connect op. Only one parameter among @bssid and @bss needs * to be specified. + * @links.status: per-link status code, to report a status code that's not + * %WLAN_STATUS_SUCCESS for a given link, it must also be in the + * @valid_links bitmap and may have a BSS pointer (which is then released) */ struct cfg80211_connect_resp_params { int status; @@ -7470,6 +7478,7 @@ struct cfg80211_connect_resp_params { const u8 *addr; const u8 *bssid; struct cfg80211_bss *bss; + u16 status; } links[IEEE80211_MLD_MAX_NUM_LINKS]; }; @@ -7674,6 +7683,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * * @dev: network device * @bssid: the BSSID of the AP + * @td_bitmap: transition disable policy + * @td_bitmap_len: Length of transition disable policy * @gfp: allocation flags * * This function should be called by a driver that supports 4 way handshake @@ -7684,7 +7695,7 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, * indicate the 802.11 association. */ void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, - gfp_t gfp); + const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp); /** * cfg80211_disconnected - notify cfg80211 that connection was dropped diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h index d8d8719315fd..d09c393d229f 100644 --- a/include/net/cfg802154.h +++ b/include/net/cfg802154.h @@ -11,7 +11,7 @@ #include <linux/ieee802154.h> #include <linux/netdevice.h> -#include <linux/mutex.h> +#include <linux/spinlock.h> #include <linux/bug.h> #include <net/nl802154.h> @@ -166,11 +166,14 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b) * level setting. * @WPAN_PHY_FLAG_CCA_MODE: Indicates that transceiver will support cca mode * setting. + * @WPAN_PHY_FLAG_STATE_QUEUE_STOPPED: Indicates that the transmit queue was + * temporarily stopped. */ enum wpan_phy_flags { WPAN_PHY_FLAG_TXPOWER = BIT(1), WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2), WPAN_PHY_FLAG_CCA_MODE = BIT(3), + WPAN_PHY_FLAG_STATE_QUEUE_STOPPED = BIT(4), }; struct wpan_phy { @@ -182,7 +185,7 @@ struct wpan_phy { */ const void *privid; - u32 flags; + unsigned long flags; /* * This is a PIB according to 802.15.4-2011. @@ -214,6 +217,17 @@ struct wpan_phy { /* the network namespace this phy lives in currently */ possible_net_t _net; + /* Transmission monitoring and control */ + spinlock_t queue_lock; + atomic_t ongoing_txs; + atomic_t hold_txs; + wait_queue_head_t sync_txq; + + /* Current filtering level on reception. + * Only allowed to be changed if phy is not operational. + */ + enum ieee802154_filtering_level filtering; + char priv[] __aligned(NETDEV_ALIGN); }; @@ -246,6 +260,24 @@ struct ieee802154_addr { }; }; +/** + * struct ieee802154_coord_desc - Coordinator descriptor + * @addr: PAN ID and coordinator address + * @page: page this coordinator is using + * @channel: channel this coordinator is using + * @superframe_spec: SuperFrame specification as received + * @link_quality: link quality indicator at which the beacon was received + * @gts_permit: the coordinator accepts GTS requests + */ +struct ieee802154_coord_desc { + struct ieee802154_addr addr; + u8 page; + u8 channel; + u16 superframe_spec; + u8 link_quality; + bool gts_permit; +}; + struct ieee802154_llsec_key_id { u8 mode; u8 id; @@ -365,8 +397,6 @@ struct wpan_dev { bool lbt; - bool promiscuous_mode; - /* fallback for acknowledgment bit setting */ bool ackreq; }; diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 2b2d86fb3131..8841ab6c2de7 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -109,6 +109,10 @@ struct dcbnl_rtnl_ops { /* buffer settings */ int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *); int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *); + + /* apptrust */ + int (*dcbnl_setapptrust)(struct net_device *, u8 *, int); + int (*dcbnl_getapptrust)(struct net_device *, u8 *, int *); }; #endif /* __NET_DCBNL_H__ */ diff --git a/include/net/devlink.h b/include/net/devlink.h index ba6b8b094943..6a2e4f21779f 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -114,6 +114,9 @@ struct devlink_rate { refcount_t refcnt; }; }; + + u32 tx_priority; + u32 tx_weight; }; struct devlink_port { @@ -121,12 +124,21 @@ struct devlink_port { struct list_head region_list; struct devlink *devlink; unsigned int index; - spinlock_t type_lock; /* Protects type and type_dev - * pointer consistency. + spinlock_t type_lock; /* Protects type and type_eth/ib + * structures consistency. */ enum devlink_port_type type; enum devlink_port_type desired_type; - void *type_dev; + union { + struct { + struct net_device *netdev; + int ifindex; + char ifname[IFNAMSIZ]; + } type_eth; + struct { + struct ib_device *ibdev; + } type_ib; + }; struct devlink_port_attrs attrs; u8 attrs_set:1, switch_port:1, @@ -609,6 +621,8 @@ enum devlink_param_generic_id { #define DEVLINK_INFO_VERSION_GENERIC_FW_ROCE "fw.roce" /* Firmware bundle identifier */ #define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID "fw.bundle_id" +/* Bootloader */ +#define DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER "fw.bootloader" /** * struct devlink_flash_update_params - Flash Update parameters @@ -638,6 +652,10 @@ struct devlink_info_req; * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_region_ops { @@ -647,6 +665,10 @@ struct devlink_region_ops { const struct devlink_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink *devlink, + const struct devlink_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; @@ -658,6 +680,10 @@ struct devlink_region_ops { * the data variable must be updated to point to the snapshot data. * The function will be called while the devlink instance lock is * held. + * @read: callback to directly read a portion of the region. On success, + * the data pointer will be updated with the contents of the + * requested portion of the region. The function will be called + * while the devlink instance lock is held. * @priv: Pointer to driver private data for the region operation */ struct devlink_port_region_ops { @@ -667,6 +693,10 @@ struct devlink_port_region_ops { const struct devlink_port_region_ops *ops, struct netlink_ext_ack *extack, u8 **data); + int (*read)(struct devlink_port *port, + const struct devlink_port_region_ops *ops, + struct netlink_ext_ack *extack, + u64 offset, u32 size, u8 *data); void *priv; }; @@ -885,6 +915,8 @@ enum devlink_trap_generic_id { DEVLINK_TRAP_GENERIC_ID_ESP_PARSING, DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP, DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER, + DEVLINK_TRAP_GENERIC_ID_EAPOL, + DEVLINK_TRAP_GENERIC_ID_LOCKED_PORT, /* Add new generic trap IDs above */ __DEVLINK_TRAP_GENERIC_ID_MAX, @@ -921,6 +953,7 @@ enum devlink_trap_group_generic_id { DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE, DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP, DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS, + DEVLINK_TRAP_GROUP_GENERIC_ID_EAPOL, /* Add new generic trap group IDs above */ __DEVLINK_TRAP_GROUP_GENERIC_ID_MAX, @@ -1112,6 +1145,10 @@ enum devlink_trap_group_generic_id { "blackhole_nexthop" #define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \ "dmac_filter" +#define DEVLINK_TRAP_GENERIC_NAME_EAPOL \ + "eapol" +#define DEVLINK_TRAP_GENERIC_NAME_LOCKED_PORT \ + "locked_port" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \ "l2_drops" @@ -1165,6 +1202,8 @@ enum devlink_trap_group_generic_id { "acl_trap" #define DEVLINK_TRAP_GROUP_GENERIC_NAME_PARSER_ERROR_DROPS \ "parser_error_drops" +#define DEVLINK_TRAP_GROUP_GENERIC_NAME_EAPOL \ + "eapol" #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id, \ _metadata_cap) \ @@ -1415,6 +1454,45 @@ struct devlink_ops { const u8 *hw_addr, int hw_addr_len, struct netlink_ext_ack *extack); /** + * @port_fn_roce_get: Port function's roce get function. + * + * Query RoCE state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_roce_set: Port function's roce set function. + * + * Enable/Disable the RoCE state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function RoCE handling is not supported. + */ + int (*port_fn_roce_set)(struct devlink_port *devlink_port, + bool enable, struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_get: Port function's migratable get function. + * + * Query migratable state of a function managed by the devlink port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_get)(struct devlink_port *devlink_port, + bool *is_enable, + struct netlink_ext_ack *extack); + /** + * @port_fn_migratable_set: Port function's migratable set function. + * + * Enable/Disable migratable state of a function managed by the devlink + * port. + * Return -EOPNOTSUPP if port function migratable handling is not + * supported. + */ + int (*port_fn_migratable_set)(struct devlink_port *devlink_port, + bool enable, + struct netlink_ext_ack *extack); + /** * port_new() - Add a new port function of a specified flavor * @devlink: Devlink instance * @attrs: attributes of the new port @@ -1493,10 +1571,18 @@ struct devlink_ops { u64 tx_share, struct netlink_ext_ack *extack); int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_leaf_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack); + int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack); int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_share, struct netlink_ext_ack *extack); int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv, u64 tx_max, struct netlink_ext_ack *extack); + int (*rate_node_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_priority, struct netlink_ext_ack *extack); + int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv, + u32 tx_weight, struct netlink_ext_ack *extack); int (*rate_node_new)(struct devlink_rate *rate_node, void **priv, struct netlink_ext_ack *extack); int (*rate_node_del)(struct devlink_rate *rate_node, void *priv, @@ -1575,8 +1661,7 @@ int devlink_port_register(struct devlink *devlink, unsigned int port_index); void devl_port_unregister(struct devlink_port *devlink_port); void devlink_port_unregister(struct devlink_port *devlink_port); -void devlink_port_type_eth_set(struct devlink_port *devlink_port, - struct net_device *netdev); +void devlink_port_type_eth_set(struct devlink_port *devlink_port); void devlink_port_type_ib_set(struct devlink_port *devlink_port, struct ib_device *ibdev); void devlink_port_type_clear(struct devlink_port *devlink_port); @@ -1589,7 +1674,12 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 controller, u16 pf, u32 sf, bool external); -int devl_rate_leaf_create(struct devlink_port *port, void *priv); +struct devlink_rate * +devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name, + struct devlink_rate *parent); +int +devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv, + struct devlink_rate *parent); void devl_rate_leaf_destroy(struct devlink_port *devlink_port); void devl_rate_nodes_destroy(struct devlink *devlink); void devlink_port_linecard_set(struct devlink_port *devlink_port, @@ -1713,8 +1803,6 @@ int devlink_region_snapshot_create(struct devlink_region *region, u8 *data, u32 snapshot_id); int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn); -int devlink_info_driver_name_put(struct devlink_info_req *req, - const char *name); int devlink_info_board_serial_number_put(struct devlink_info_req *req, const char *bsn); @@ -1865,6 +1953,9 @@ int devlink_compat_phys_port_name_get(struct net_device *dev, int devlink_compat_switch_id_get(struct net_device *dev, struct netdev_phys_item_id *ppid); +int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port); +size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port); + #else static inline struct devlink *devlink_try_get(struct devlink *devlink) @@ -1901,6 +1992,17 @@ devlink_compat_switch_id_get(struct net_device *dev, return -EOPNOTSUPP; } +static inline int +devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port) +{ + return 0; +} + +static inline size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port) +{ + return 0; +} + #endif #endif /* _NET_DEVLINK_H_ */ diff --git a/include/net/dropreason.h b/include/net/dropreason.h index c1cbcdbaf149..70539288f995 100644 --- a/include/net/dropreason.h +++ b/include/net/dropreason.h @@ -68,6 +68,9 @@ FN(IP_INADDRERRORS) \ FN(IP_INNOROUTES) \ FN(PKT_TOO_BIG) \ + FN(DUP_FRAG) \ + FN(FRAG_REASM_TIMEOUT) \ + FN(FRAG_TOO_FAR) \ FNe(MAX) /** @@ -80,6 +83,8 @@ enum skb_drop_reason { * @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case) */ SKB_NOT_DROPPED_YET = 0, + /** @SKB_CONSUMED: packet has been consumed */ + SKB_CONSUMED, /** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */ SKB_DROP_REASON_NOT_SPECIFIED, /** @SKB_DROP_REASON_NO_SOCKET: socket not found */ @@ -298,6 +303,15 @@ enum skb_drop_reason { * MTU) */ SKB_DROP_REASON_PKT_TOO_BIG, + /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */ + SKB_DROP_REASON_DUP_FRAG, + /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */ + SKB_DROP_REASON_FRAG_REASM_TIMEOUT, + /** + * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far. + * (/proc/sys/net/ipv4/ipfrag_max_dist) + */ + SKB_DROP_REASON_FRAG_TOO_FAR, /** * @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be * used as a real 'reason' diff --git a/include/net/dsa.h b/include/net/dsa.h index ee369670e20e..96086289aa9b 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -22,6 +22,7 @@ #include <net/devlink.h> #include <net/switchdev.h> +struct dsa_8021q_context; struct tc_action; struct phy_device; struct fixed_phy_status; @@ -118,10 +119,6 @@ struct dsa_netdevice_ops { int cmd); }; -#define DSA_TAG_DRIVER_ALIAS "dsa_tag-" -#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \ - MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE)) - struct dsa_lag { struct net_device *dev; unsigned int id; @@ -880,9 +877,6 @@ struct dsa_switch_ops { */ void (*phylink_get_caps)(struct dsa_switch *ds, int port, struct phylink_config *config); - void (*phylink_validate)(struct dsa_switch *ds, int port, - unsigned long *supported, - struct phylink_link_state *state); struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds, int port, phy_interface_t iface); @@ -1292,8 +1286,6 @@ struct dsa_switch_driver { const struct dsa_switch_ops *ops; }; -struct net_device *dsa_dev_to_net_device(struct device *dev); - bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port, const unsigned char *addr, u16 vid, struct dsa_db db); @@ -1403,70 +1395,4 @@ static inline bool dsa_slave_dev_check(const struct net_device *dev) netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev); void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up); -struct dsa_tag_driver { - const struct dsa_device_ops *ops; - struct list_head list; - struct module *owner; -}; - -void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[], - unsigned int count, - struct module *owner); -void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[], - unsigned int count); - -#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \ -static int __init dsa_tag_driver_module_init(void) \ -{ \ - dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \ - THIS_MODULE); \ - return 0; \ -} \ -module_init(dsa_tag_driver_module_init); \ - \ -static void __exit dsa_tag_driver_module_exit(void) \ -{ \ - dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \ -} \ -module_exit(dsa_tag_driver_module_exit) - -/** - * module_dsa_tag_drivers() - Helper macro for registering DSA tag - * drivers - * @__ops_array: Array of tag driver structures - * - * Helper macro for DSA tag drivers which do not do anything special - * in module init/exit. Each module may only use this macro once, and - * calling it replaces module_init() and module_exit(). - */ -#define module_dsa_tag_drivers(__ops_array) \ -dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array)) - -#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops - -/* Create a static structure we can build a linked list of dsa_tag - * drivers - */ -#define DSA_TAG_DRIVER(__ops) \ -static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \ - .ops = &__ops, \ -} - -/** - * module_dsa_tag_driver() - Helper macro for registering a single DSA tag - * driver - * @__ops: Single tag driver structures - * - * Helper macro for DSA tag drivers which do not do anything special - * in module init/exit. Each module may only use this macro once, and - * calling it replaces module_init() and module_exit(). - */ -#define module_dsa_tag_driver(__ops) \ -DSA_TAG_DRIVER(__ops); \ - \ -static struct dsa_tag_driver *dsa_tag_driver_array[] = { \ - &DSA_TAG_DRIVER_NAME(__ops) \ -}; \ -module_dsa_tag_drivers(dsa_tag_driver_array) #endif - diff --git a/include/net/dst.h b/include/net/dst.h index 00b479ce6b99..d67fda89cd0f 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -356,9 +356,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev, struct net *net) { - /* TODO : stats should be SMP safe */ - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; + DEV_STATS_INC(dev, rx_packets); + DEV_STATS_ADD(dev, rx_bytes, skb->len); __skb_tunnel_rx(skb, dev, net); } diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index a454cf4327fe..1b7fae4c6b24 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -26,6 +26,7 @@ struct macsec_info { struct xfrm_md_info { u32 if_id; int link; + struct dst_entry *dst_orig; }; struct metadata_dst { diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h index e343f9f8363e..0400a0ac8a29 100644 --- a/include/net/flow_offload.h +++ b/include/net/flow_offload.h @@ -32,6 +32,10 @@ struct flow_match_vlan { struct flow_dissector_key_vlan *key, *mask; }; +struct flow_match_arp { + struct flow_dissector_key_arp *key, *mask; +}; + struct flow_match_ipv4_addrs { struct flow_dissector_key_ipv4_addrs *key, *mask; }; @@ -98,6 +102,8 @@ void flow_rule_match_vlan(const struct flow_rule *rule, struct flow_match_vlan *out); void flow_rule_match_cvlan(const struct flow_rule *rule, struct flow_match_vlan *out); +void flow_rule_match_arp(const struct flow_rule *rule, + struct flow_match_arp *out); void flow_rule_match_ipv4_addrs(const struct flow_rule *rule, struct flow_match_ipv4_addrs *out); void flow_rule_match_ipv6_addrs(const struct flow_rule *rule, @@ -155,6 +161,7 @@ enum flow_action_id { FLOW_ACTION_MARK, FLOW_ACTION_PTYPE, FLOW_ACTION_PRIORITY, + FLOW_ACTION_RX_QUEUE_MAPPING, FLOW_ACTION_WAKE, FLOW_ACTION_QUEUE, FLOW_ACTION_SAMPLE, @@ -247,6 +254,7 @@ struct flow_action_entry { u32 csum_flags; /* FLOW_ACTION_CSUM */ u32 mark; /* FLOW_ACTION_MARK */ u16 ptype; /* FLOW_ACTION_PTYPE */ + u16 rx_queue; /* FLOW_ACTION_RX_QUEUE_MAPPING */ u32 priority; /* FLOW_ACTION_PRIORITY */ struct { /* FLOW_ACTION_QUEUE */ u32 ctx; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 524b510f1c68..9467e33dfb36 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -200,6 +200,7 @@ static void fq_tin_enqueue(struct fq *fq, fq_skb_free_t free_func) { struct fq_flow *flow; + struct sk_buff *next; bool oom; lockdep_assert_held(&fq->lock); @@ -214,11 +215,15 @@ static void fq_tin_enqueue(struct fq *fq, } flow->tin = tin; - flow->backlog += skb->len; - tin->backlog_bytes += skb->len; - tin->backlog_packets++; - fq->memory_usage += skb->truesize; - fq->backlog++; + skb_list_walk_safe(skb, skb, next) { + skb_mark_not_on_list(skb); + flow->backlog += skb->len; + tin->backlog_bytes += skb->len; + tin->backlog_packets++; + fq->memory_usage += skb->truesize; + fq->backlog++; + __skb_queue_tail(&flow->queue, skb); + } if (list_empty(&flow->flowchain)) { flow->deficit = fq->quantum; @@ -226,7 +231,6 @@ static void fq_tin_enqueue(struct fq *fq, &tin->new_flows); } - __skb_queue_tail(&flow->queue, skb); oom = (fq->memory_usage > fq->memory_limit); while (fq->backlog > fq->limit || oom) { flow = fq_find_fattest_flow(fq); diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 9f97f73615b6..ed4622dd4828 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -18,12 +18,11 @@ struct genl_multicast_group { u8 flags; }; -struct genl_ops; +struct genl_split_ops; struct genl_info; /** * struct genl_family - generic netlink family - * @id: protocol family identifier (private) * @hdrsize: length of user specific header in bytes * @name: name of family * @version: protocol version @@ -43,12 +42,13 @@ struct genl_info; * @resv_start_op: first operation for which reserved fields of the header * can be validated and policies are required (see below); * new families should leave this field at zero - * @mcgrp_offset: starting number of multicast group IDs in this family - * (private) * @ops: the operations supported by this family * @n_ops: number of operations supported by this family * @small_ops: the small-struct operations supported by this family * @n_small_ops: number of small-struct operations supported by this family + * @split_ops: the split do/dump form of operation definition + * @n_split_ops: number of entries in @split_ops, not that with split do/dump + * ops the number of entries is not the same as number of commands * * Attribute policies (the combination of @policy and @maxattr fields) * can be attached at the family level or at the operation level. @@ -58,29 +58,35 @@ struct genl_info; * if policy is not provided core will reject all TLV attributes. */ struct genl_family { - int id; /* private */ unsigned int hdrsize; char name[GENL_NAMSIZ]; unsigned int version; unsigned int maxattr; - unsigned int mcgrp_offset; /* private */ u8 netnsok:1; u8 parallel_ops:1; u8 n_ops; u8 n_small_ops; + u8 n_split_ops; u8 n_mcgrps; u8 resv_start_op; const struct nla_policy *policy; - int (*pre_doit)(const struct genl_ops *ops, + int (*pre_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); - void (*post_doit)(const struct genl_ops *ops, + void (*post_doit)(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); const struct genl_ops * ops; const struct genl_small_ops *small_ops; + const struct genl_split_ops *split_ops; const struct genl_multicast_group *mcgrps; struct module *module; + +/* private: internal use only */ + /* protocol family identifier */ + int id; + /* starting number of multicast group IDs in this family */ + unsigned int mcgrp_offset; }; /** @@ -119,6 +125,9 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) #define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg) +#define GENL_SET_ERR_MSG_FMT(info, msg, args...) \ + NL_SET_ERR_MSG_FMT((info)->extack, msg, ##args) + /* Report that a root attribute is missing */ #define GENL_REQ_ATTR_CHECK(info, attr) ({ \ struct genl_info *__info = (info); \ @@ -182,6 +191,58 @@ struct genl_ops { }; /** + * struct genl_split_ops - generic netlink operations (do/dump split version) + * @cmd: command identifier + * @internal_flags: flags used by the family + * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM) + * @validate: validation flags from enum genl_validate_flags + * @policy: netlink policy (takes precedence over family policy) + * @maxattr: maximum number of attributes supported + * + * Do callbacks: + * @pre_doit: called before an operation's @doit callback, it may + * do additional, common, filtering and return an error + * @doit: standard command callback + * @post_doit: called after an operation's @doit callback, it may + * undo operations done by pre_doit, for example release locks + * + * Dump callbacks: + * @start: start callback for dumps + * @dumpit: callback for dumpers + * @done: completion callback for dumps + * + * Do callbacks can be used if %GENL_CMD_CAP_DO is set in @flags. + * Dump callbacks can be used if %GENL_CMD_CAP_DUMP is set in @flags. + * Exactly one of those flags must be set. + */ +struct genl_split_ops { + union { + struct { + int (*pre_doit)(const struct genl_split_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + int (*doit)(struct sk_buff *skb, + struct genl_info *info); + void (*post_doit)(const struct genl_split_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + }; + struct { + int (*start)(struct netlink_callback *cb); + int (*dumpit)(struct sk_buff *skb, + struct netlink_callback *cb); + int (*done)(struct netlink_callback *cb); + }; + }; + const struct nla_policy *policy; + unsigned int maxattr; + u8 cmd; + u8 internal_flags; + u8 flags; + u8 validate; +}; + +/** * struct genl_dumpit_info - info that is available during dumpit op call * @family: generic netlink family - for internal genl code usage * @op: generic netlink ops - for internal genl code usage @@ -189,7 +250,7 @@ struct genl_ops { */ struct genl_dumpit_info { const struct genl_family *family; - struct genl_ops op; + struct genl_split_ops op; struct nlattr **attrs; }; diff --git a/include/net/geneve.h b/include/net/geneve.h index bced0b1d9fe4..5c96827a487e 100644 --- a/include/net/geneve.h +++ b/include/net/geneve.h @@ -59,7 +59,7 @@ struct genevehdr { __be16 proto_type; u8 vni[3]; u8 rsvd2; - struct geneve_opt options[]; + u8 options[]; }; static inline bool netif_is_geneve(const struct net_device *dev) diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h index 03b64bf876a4..4c33a20ea57f 100644 --- a/include/net/ieee802154_netdev.h +++ b/include/net/ieee802154_netdev.h @@ -85,6 +85,14 @@ struct ieee802154_hdr_fc { #endif }; +enum ieee802154_frame_version { + IEEE802154_2003_STD, + IEEE802154_2006_STD, + IEEE802154_STD, + IEEE802154_RESERVED_STD, + IEEE802154_MULTIPURPOSE_STD = IEEE802154_2003_STD, +}; + struct ieee802154_hdr { struct ieee802154_hdr_fc fc; u8 seq; diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 0b0876610553..b23ddec3cd5c 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -7,6 +7,7 @@ #include <linux/in6.h> #include <linux/rbtree_types.h> #include <linux/refcount.h> +#include <net/dropreason.h> /* Per netns frag queues directory */ struct fqdir { @@ -34,12 +35,14 @@ struct fqdir { * @INET_FRAG_LAST_IN: final fragment has arrived * @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction * @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable + * @INET_FRAG_DROP: if skbs must be dropped (instead of being consumed) */ enum { INET_FRAG_FIRST_IN = BIT(0), INET_FRAG_LAST_IN = BIT(1), INET_FRAG_COMPLETE = BIT(2), INET_FRAG_HASH_DEAD = BIT(3), + INET_FRAG_DROP = BIT(4), }; struct frag_v4_compare_key { @@ -139,7 +142,8 @@ void inet_frag_destroy(struct inet_frag_queue *q); struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key); /* Free all skbs in the queue; return the sum of their truesizes. */ -unsigned int inet_frag_rbtree_purge(struct rb_root *root); +unsigned int inet_frag_rbtree_purge(struct rb_root *root, + enum skb_drop_reason reason); static inline void inet_frag_put(struct inet_frag_queue *q) { diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index ff1804a0c469..c6c61100d244 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -29,6 +29,7 @@ #include <net/netfilter/nf_conntrack.h> #endif #include <net/net_namespace.h> /* Netw namespace */ +#include <linux/sched/isolation.h> #define IP_VS_HDR_INVERSE 1 #define IP_VS_HDR_ICMP 2 @@ -42,6 +43,8 @@ static inline struct netns_ipvs *net_ipvs(struct net* net) /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; +extern struct mutex __ip_vs_mutex; + struct ip_vs_iphdr { int hdr_flags; /* ipvs flags */ __u32 off; /* Where IP or IPv4 header starts */ @@ -351,11 +354,11 @@ struct ip_vs_seq { /* counters per cpu */ struct ip_vs_counters { - __u64 conns; /* connections scheduled */ - __u64 inpkts; /* incoming packets */ - __u64 outpkts; /* outgoing packets */ - __u64 inbytes; /* incoming bytes */ - __u64 outbytes; /* outgoing bytes */ + u64_stats_t conns; /* connections scheduled */ + u64_stats_t inpkts; /* incoming packets */ + u64_stats_t outpkts; /* outgoing packets */ + u64_stats_t inbytes; /* incoming bytes */ + u64_stats_t outbytes; /* outgoing bytes */ }; /* Stats per cpu */ struct ip_vs_cpu_stats { @@ -363,9 +366,12 @@ struct ip_vs_cpu_stats { struct u64_stats_sync syncp; }; +/* Default nice for estimator kthreads */ +#define IPVS_EST_NICE 0 + /* IPVS statistics objects */ struct ip_vs_estimator { - struct list_head list; + struct hlist_node list; u64 last_inbytes; u64 last_outbytes; @@ -378,6 +384,10 @@ struct ip_vs_estimator { u64 outpps; u64 inbps; u64 outbps; + + s32 ktid:16, /* kthread ID, -1=temp list */ + ktrow:8, /* row/tick ID for kthread */ + ktcid:8; /* chain ID for kthread tick */ }; /* @@ -405,6 +415,76 @@ struct ip_vs_stats { struct ip_vs_kstats kstats0; /* reset values */ }; +struct ip_vs_stats_rcu { + struct ip_vs_stats s; + struct rcu_head rcu_head; +}; + +int ip_vs_stats_init_alloc(struct ip_vs_stats *s); +struct ip_vs_stats *ip_vs_stats_alloc(void); +void ip_vs_stats_release(struct ip_vs_stats *stats); +void ip_vs_stats_free(struct ip_vs_stats *stats); + +/* Process estimators in multiple timer ticks (20/50/100, see ktrow) */ +#define IPVS_EST_NTICKS 50 +/* Estimation uses a 2-second period containing ticks (in jiffies) */ +#define IPVS_EST_TICK ((2 * HZ) / IPVS_EST_NTICKS) + +/* Limit of CPU load per kthread (8 for 12.5%), ratio of CPU capacity (1/C). + * Value of 4 and above ensures kthreads will take work without exceeding + * the CPU capacity under different circumstances. + */ +#define IPVS_EST_LOAD_DIVISOR 8 + +/* Kthreads should not have work that exceeds the CPU load above 50% */ +#define IPVS_EST_CPU_KTHREADS (IPVS_EST_LOAD_DIVISOR / 2) + +/* Desired number of chains per timer tick (chain load factor in 100us units), + * 48=4.8ms of 40ms tick (12% CPU usage): + * 2 sec * 1000 ms in sec * 10 (100us in ms) / 8 (12.5%) / 50 + */ +#define IPVS_EST_CHAIN_FACTOR \ + ALIGN_DOWN(2 * 1000 * 10 / IPVS_EST_LOAD_DIVISOR / IPVS_EST_NTICKS, 8) + +/* Compiled number of chains per tick + * The defines should match cond_resched_rcu + */ +#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) +#define IPVS_EST_TICK_CHAINS IPVS_EST_CHAIN_FACTOR +#else +#define IPVS_EST_TICK_CHAINS 1 +#endif + +#if IPVS_EST_NTICKS > 127 +#error Too many timer ticks for ktrow +#endif + +/* Multiple chains processed in same tick */ +struct ip_vs_est_tick_data { + struct hlist_head chains[IPVS_EST_TICK_CHAINS]; + DECLARE_BITMAP(present, IPVS_EST_TICK_CHAINS); + DECLARE_BITMAP(full, IPVS_EST_TICK_CHAINS); + int chain_len[IPVS_EST_TICK_CHAINS]; +}; + +/* Context for estimation kthread */ +struct ip_vs_est_kt_data { + struct netns_ipvs *ipvs; + struct task_struct *task; /* task if running */ + struct ip_vs_est_tick_data __rcu *ticks[IPVS_EST_NTICKS]; + DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */ + unsigned long est_timer; /* estimation timer (jiffies) */ + struct ip_vs_stats *calc_stats; /* Used for calculation */ + int tick_len[IPVS_EST_NTICKS]; /* est count */ + int id; /* ktid per netns */ + int chain_max; /* max ests per tick chain */ + int tick_max; /* max ests per tick */ + int est_count; /* attached ests to kthread */ + int est_max_count; /* max ests per kthread */ + int add_row; /* row for new ests */ + int est_row; /* estimated row */ +}; + struct dst_entry; struct iphdr; struct ip_vs_conn; @@ -688,6 +768,7 @@ struct ip_vs_dest { union nf_inet_addr vaddr; /* virtual IP address */ __u32 vfwmark; /* firewall mark of service */ + struct rcu_head rcu_head; struct list_head t_list; /* in dest_trash */ unsigned int in_rs_table:1; /* we are in rs_table */ }; @@ -869,7 +950,7 @@ struct netns_ipvs { atomic_t conn_count; /* connection counter */ /* ip_vs_ctl */ - struct ip_vs_stats tot_stats; /* Statistics & est. */ + struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */ int num_services; /* no of virtual services */ int num_services6; /* IPv6 virtual services */ @@ -932,6 +1013,12 @@ struct netns_ipvs { int sysctl_schedule_icmp; int sysctl_ignore_tunneled; int sysctl_run_estimation; +#ifdef CONFIG_SYSCTL + cpumask_var_t sysctl_est_cpulist; /* kthread cpumask */ + int est_cpulist_valid; /* cpulist set */ + int sysctl_est_nice; /* kthread nice */ + int est_stopped; /* stop tasks */ +#endif /* ip_vs_lblc */ int sysctl_lblc_expiration; @@ -942,9 +1029,17 @@ struct netns_ipvs { struct ctl_table_header *lblcr_ctl_header; struct ctl_table *lblcr_ctl_table; /* ip_vs_est */ - struct list_head est_list; /* estimator list */ - spinlock_t est_lock; - struct timer_list est_timer; /* Estimation timer */ + struct delayed_work est_reload_work;/* Reload kthread tasks */ + struct mutex est_mutex; /* protect kthread tasks */ + struct hlist_head est_temp_list; /* Ests during calc phase */ + struct ip_vs_est_kt_data **est_kt_arr; /* Array of kthread data ptrs */ + unsigned long est_max_threads;/* Hard limit of kthreads */ + int est_calc_phase; /* Calculation phase */ + int est_chain_max; /* Calculated chain_max */ + int est_kt_count; /* Allocated ptrs */ + int est_add_ktid; /* ktid where to add ests */ + atomic_t est_genid; /* kthreads reload genid */ + atomic_t est_genid_done; /* applied genid */ /* ip_vs_sync */ spinlock_t sync_lock; struct ipvs_master_sync_state *ms; @@ -1077,6 +1172,19 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return ipvs->sysctl_run_estimation; } +static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +{ + if (ipvs->est_cpulist_valid) + return ipvs->sysctl_est_cpulist; + else + return housekeeping_cpumask(HK_TYPE_KTHREAD); +} + +static inline int sysctl_est_nice(struct netns_ipvs *ipvs) +{ + return ipvs->sysctl_est_nice; +} + #else static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs) @@ -1174,6 +1282,16 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs) return 1; } +static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs) +{ + return housekeeping_cpumask(HK_TYPE_KTHREAD); +} + +static inline int sysctl_est_nice(struct netns_ipvs *ipvs) +{ + return IPVS_EST_NICE; +} + #endif /* IPVS core functions @@ -1475,10 +1593,41 @@ int stop_sync_thread(struct netns_ipvs *ipvs, int state); void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts); /* IPVS rate estimator prototypes (from ip_vs_est.c) */ -void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); +int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats); void ip_vs_zero_estimator(struct ip_vs_stats *stats); void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats); +void ip_vs_est_reload_start(struct netns_ipvs *ipvs); +int ip_vs_est_kthread_start(struct netns_ipvs *ipvs, + struct ip_vs_est_kt_data *kd); +void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd); + +static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + /* Stop tasks while cpulist is empty or if disabled with flag */ + ipvs->est_stopped = !sysctl_run_estimation(ipvs) || + (ipvs->est_cpulist_valid && + cpumask_empty(sysctl_est_cpulist(ipvs))); +#endif +} + +static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs) +{ +#ifdef CONFIG_SYSCTL + return ipvs->est_stopped; +#else + return false; +#endif +} + +static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs) +{ + unsigned int limit = IPVS_EST_CPU_KTHREADS * + cpumask_weight(sysctl_est_cpulist(ipvs)); + + return max(1U, limit); +} /* Various IPVS packet transmitters (from ip_vs_xmit.c) */ int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d383c895592a..03f3af02a9a6 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -500,6 +500,39 @@ static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb) return jhdr->nexthdr; } +/* Return 0 if HBH header is successfully removed + * Or if HBH removal is unnecessary (packet is not big TCP) + * Return error to indicate dropping the packet + */ +static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb) +{ + const int hophdr_len = sizeof(struct hop_jumbo_hdr); + int nexthdr = ipv6_has_hopopt_jumbo(skb); + struct ipv6hdr *h6; + + if (!nexthdr) + return 0; + + if (skb_cow_head(skb, 0)) + return -1; + + /* Remove the HBH header. + * Layout: [Ethernet header][IPv6 header][HBH][L4 Header] + */ + memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb), + skb_network_header(skb) - skb_mac_header(skb) + + sizeof(struct ipv6hdr)); + + __skb_pull(skb, hophdr_len); + skb->network_header += hophdr_len; + skb->mac_header += hophdr_len; + + h6 = ipv6_hdr(skb); + h6->nexthdr = nexthdr; + + return 0; +} + static inline bool ipv6_accept_ra(struct inet6_dev *idev) { /* If forwarding is enabled, RA are not accepted unless the special diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h index 5052c66e22d2..7321ffe3a108 100644 --- a/include/net/ipv6_frag.h +++ b/include/net/ipv6_frag.h @@ -76,6 +76,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) if (fq->q.flags & INET_FRAG_COMPLETE) goto out; + fq->q.flags |= INET_FRAG_DROP; inet_frag_kill(&fq->q); dev = dev_get_by_index_rcu(net, fq->iif); @@ -101,7 +102,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq) spin_unlock(&fq->q.lock); icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); - kfree_skb(head); + kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT); goto out_rcu_unlock; out: diff --git a/include/net/mac80211.h b/include/net/mac80211.h index ac2bad57933f..689da327ce2e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -89,15 +89,13 @@ /** * DOC: mac80211 software tx queueing * - * mac80211 provides an optional intermediate queueing implementation designed - * to allow the driver to keep hardware queues short and provide some fairness - * between different stations/interfaces. - * In this model, the driver pulls data frames from the mac80211 queue instead - * of letting mac80211 push them via drv_tx(). - * Other frames (e.g. control or management) are still pushed using drv_tx(). + * mac80211 uses an intermediate queueing implementation, designed to allow the + * driver to keep hardware queues short and to provide some fairness between + * different stations/interfaces. * - * Drivers indicate that they use this model by implementing the .wake_tx_queue - * driver operation. + * Drivers must provide the .wake_tx_queue driver operation by either + * linking it to ieee80211_handle_wake_tx_queue() or implementing a custom + * handler. * * Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with * another per-sta for non-data/non-mgmt and bufferable management frames, and @@ -106,9 +104,12 @@ * The driver is expected to initialize its private per-queue data for stations * and interfaces in the .add_interface and .sta_add ops. * - * The driver can't access the queue directly. To dequeue a frame from a - * txq, it calls ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a - * queue, it calls the .wake_tx_queue driver op. + * The driver can't access the internal TX queues (iTXQs) directly. + * Whenever mac80211 adds a new frame to a queue, it calls the .wake_tx_queue + * driver op. + * Drivers implementing a custom .wake_tx_queue op can get them by calling + * ieee80211_tx_dequeue(). Drivers using ieee80211_handle_wake_tx_queue() will + * simply get the individual frames pushed via the .tx driver operation. * * Drivers can optionally delegate responsibility for scheduling queues to * mac80211, to take advantage of airtime fairness accounting. In this case, to @@ -1806,6 +1807,10 @@ struct ieee80211_vif_cfg { * @addr: address of this interface * @p2p: indicates whether this AP or STA interface is a p2p * interface, i.e. a GO or p2p-sta respectively + * @netdev_features: tx netdev features supported by the hardware for this + * vif. mac80211 initializes this to hw->netdev_features, and the driver + * can mask out specific tx features. mac80211 will handle software fixup + * for masked offloads (GSO, CSUM) * @driver_flags: flags/capabilities the driver has for this interface, * these need to be set (or cleared) when the interface is added * or, if supported by the driver, the interface type is changed @@ -1826,7 +1831,7 @@ struct ieee80211_vif_cfg { * for this interface. * @drv_priv: data area for driver use, will always be aligned to * sizeof(void \*). - * @txq: the multicast data TX queue (if driver uses the TXQ abstraction) + * @txq: the multicast data TX queue * @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped, * protected by fq->lock. * @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see @@ -1847,6 +1852,7 @@ struct ieee80211_vif { struct ieee80211_txq *txq; + netdev_features_t netdev_features; u32 driver_flags; u32 offload_flags; @@ -1915,6 +1921,10 @@ static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif) rcu_dereference_protected((vif)->link_conf[link_id], \ lockdep_vif_mutex_held(vif)) +#define link_conf_dereference_check(vif, link_id) \ + rcu_dereference_check((vif)->link_conf[link_id], \ + lockdep_vif_mutex_held(vif)) + /** * enum ieee80211_key_flags - key flags * @@ -2176,6 +2186,7 @@ struct ieee80211_sta_aggregates { * All link specific info for a STA link for a non MLD STA(single) * or a MLD STA(multiple entries) are stored here. * + * @sta: reference to owning STA * @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr * in ieee80211_sta. For MLO Link STA this addr can be same or different * from addr in ieee80211_sta (representing MLD STA addr) @@ -2196,6 +2207,8 @@ struct ieee80211_sta_aggregates { * */ struct ieee80211_link_sta { + struct ieee80211_sta *sta; + u8 addr[ETH_ALEN]; u8 link_id; enum ieee80211_smps_mode smps_mode; @@ -2252,8 +2265,8 @@ struct ieee80211_link_sta { * For non MLO STA it will point to the deflink data. For MLO STA * ieee80211_sta_recalc_aggregates() must be called to update it. * @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not. - * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that - * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames + * @txq: per-TID data TX queues; note that the last entry (%IEEE80211_NUM_TIDS) + * is used for non-data frames * @deflink: This holds the default link STA information, for non MLO STA all link * specific STA information is accessed through @deflink or through * link[0] which points to address of @deflink. For MLO Link STA @@ -2308,6 +2321,10 @@ static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) rcu_dereference_protected((sta)->link[link_id], \ lockdep_sta_mutex_held(sta)) +#define link_sta_dereference_check(sta, link_id) \ + rcu_dereference_check((sta)->link[link_id], \ + lockdep_sta_mutex_held(sta)) + #define for_each_sta_active_link(vif, sta, link_sta, link_id) \ for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \ if ((!(vif)->active_links || \ @@ -3787,6 +3804,13 @@ struct ieee80211_prep_tx_info { * should be within a CONFIG_MAC80211_DEBUGFS conditional. This * callback can sleep. * + * @link_sta_add_debugfs: Drivers can use this callback to add debugfs files + * when a link is added to a mac80211 station. This callback + * should be within a CPTCFG_MAC80211_DEBUGFS conditional. This + * callback can sleep. + * For non-MLO the callback will be called once for the deflink with the + * station's directory rather than a separate subdirectory. + * * @sta_notify: Notifies low level driver about power state transition of an * associated station, AP, IBSS/WDS/mesh peer etc. For a VIF operating * in AP mode, this callback will not be called when the flag @@ -4257,6 +4281,10 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); + void (*link_sta_add_debugfs)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct ieee80211_link_sta *link_sta, + struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); @@ -5691,7 +5719,7 @@ void ieee80211_key_replay(struct ieee80211_key_conf *keyconf); * @hw: pointer as obtained from ieee80211_alloc_hw(). * @queue: queue number (counted from zero). * - * Drivers should use this function instead of netif_wake_queue. + * Drivers must use this function instead of netif_wake_queue. */ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue); @@ -5700,7 +5728,7 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue); * @hw: pointer as obtained from ieee80211_alloc_hw(). * @queue: queue number (counted from zero). * - * Drivers should use this function instead of netif_stop_queue. + * Drivers must use this function instead of netif_stop_queue. */ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue); @@ -5709,7 +5737,7 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue); * @hw: pointer as obtained from ieee80211_alloc_hw(). * @queue: queue number (counted from zero). * - * Drivers should use this function instead of netif_stop_queue. + * Drivers must use this function instead of netif_queue_stopped. * * Return: %true if the queue is stopped. %false otherwise. */ @@ -5720,7 +5748,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue); * ieee80211_stop_queues - stop all queues * @hw: pointer as obtained from ieee80211_alloc_hw(). * - * Drivers should use this function instead of netif_stop_queue. + * Drivers must use this function instead of netif_tx_stop_all_queues. */ void ieee80211_stop_queues(struct ieee80211_hw *hw); @@ -5728,7 +5756,7 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw); * ieee80211_wake_queues - wake all queues * @hw: pointer as obtained from ieee80211_alloc_hw(). * - * Drivers should use this function instead of netif_wake_queue. + * Drivers must use this function instead of netif_tx_wake_all_queues. */ void ieee80211_wake_queues(struct ieee80211_hw *hw); @@ -6950,6 +6978,18 @@ static inline struct sk_buff *ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw, } /** + * ieee80211_handle_wake_tx_queue - mac80211 handler for wake_tx_queue callback + * + * @hw: pointer as obtained from wake_tx_queue() callback(). + * @txq: pointer as obtained from wake_tx_queue() callback(). + * + * Drivers can use this function for the mandatory mac80211 wake_tx_queue + * callback in struct ieee80211_ops. They should not call this function. + */ +void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq); + +/** * ieee80211_next_txq - get next tx queue to pull packets from * * @hw: pointer as obtained from ieee80211_alloc_hw() diff --git a/include/net/mac802154.h b/include/net/mac802154.h index bdac0ddbdcdb..4a3a9de9da73 100644 --- a/include/net/mac802154.h +++ b/include/net/mac802154.h @@ -111,9 +111,6 @@ struct ieee802154_hw { * promiscuous mode setting. * * @IEEE802154_HW_RX_OMIT_CKSUM: Indicates that receiver omits FCS. - * - * @IEEE802154_HW_RX_DROP_BAD_CKSUM: Indicates that receiver will not filter - * frames with bad checksum. */ enum ieee802154_hw_flags { IEEE802154_HW_TX_OMIT_CKSUM = BIT(0), @@ -123,7 +120,6 @@ enum ieee802154_hw_flags { IEEE802154_HW_AFILT = BIT(4), IEEE802154_HW_PROMISCUOUS = BIT(5), IEEE802154_HW_RX_OMIT_CKSUM = BIT(6), - IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(7), }; /* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */ @@ -460,33 +456,6 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw); */ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb, u8 lqi); -/** - * ieee802154_wake_queue - wake ieee802154 queue - * @hw: pointer as obtained from ieee802154_alloc_hw(). - * - * Tranceivers usually have either one transmit framebuffer or one framebuffer - * for both transmitting and receiving. Hence, the core currently only handles - * one frame at a time for each phy, which means we had to stop the queue to - * avoid new skb to come during the transmission. The queue then needs to be - * woken up after the operation. - * - * Drivers should use this function instead of netif_wake_queue. - */ -void ieee802154_wake_queue(struct ieee802154_hw *hw); - -/** - * ieee802154_stop_queue - stop ieee802154 queue - * @hw: pointer as obtained from ieee802154_alloc_hw(). - * - * Tranceivers usually have either one transmit framebuffer or one framebuffer - * for both transmitting and receiving. Hence, the core currently only handles - * one frame at a time for each phy, which means we need to tell upper layers to - * stop giving us new skbs while we are busy with the transmitted one. The queue - * must then be stopped before transmitting. - * - * Drivers should use this function instead of netif_stop_queue. - */ -void ieee802154_stop_queue(struct ieee802154_hw *hw); /** * ieee802154_xmit_complete - frame transmission complete diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h new file mode 100644 index 000000000000..d80c78506f19 --- /dev/null +++ b/include/net/mana/gdma.h @@ -0,0 +1,841 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _GDMA_H +#define _GDMA_H + +#include <linux/dma-mapping.h> +#include <linux/netdevice.h> + +#include "shm_channel.h" + +#define GDMA_STATUS_MORE_ENTRIES 0x00000105 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +enum gdma_request_type { + GDMA_VERIFY_VF_DRIVER_VERSION = 1, + GDMA_QUERY_MAX_RESOURCES = 2, + GDMA_LIST_DEVICES = 3, + GDMA_REGISTER_DEVICE = 4, + GDMA_DEREGISTER_DEVICE = 5, + GDMA_GENERATE_TEST_EQE = 10, + GDMA_CREATE_QUEUE = 12, + GDMA_DISABLE_QUEUE = 13, + GDMA_ALLOCATE_RESOURCE_RANGE = 22, + GDMA_DESTROY_RESOURCE_RANGE = 24, + GDMA_CREATE_DMA_REGION = 25, + GDMA_DMA_REGION_ADD_PAGES = 26, + GDMA_DESTROY_DMA_REGION = 27, + GDMA_CREATE_PD = 29, + GDMA_DESTROY_PD = 30, + GDMA_CREATE_MR = 31, + GDMA_DESTROY_MR = 32, +}; + +#define GDMA_RESOURCE_DOORBELL_PAGE 27 + +enum gdma_queue_type { + GDMA_INVALID_QUEUE, + GDMA_SQ, + GDMA_RQ, + GDMA_CQ, + GDMA_EQ, +}; + +enum gdma_work_request_flags { + GDMA_WR_NONE = 0, + GDMA_WR_OOB_IN_SGL = BIT(0), + GDMA_WR_PAD_BY_SGE0 = BIT(1), +}; + +enum gdma_eqe_type { + GDMA_EQE_COMPLETION = 3, + GDMA_EQE_TEST_EVENT = 64, + GDMA_EQE_HWC_INIT_EQ_ID_DB = 129, + GDMA_EQE_HWC_INIT_DATA = 130, + GDMA_EQE_HWC_INIT_DONE = 131, +}; + +enum { + GDMA_DEVICE_NONE = 0, + GDMA_DEVICE_HWC = 1, + GDMA_DEVICE_MANA = 2, +}; + +typedef u64 gdma_obj_handle_t; + +struct gdma_resource { + /* Protect the bitmap */ + spinlock_t lock; + + /* The bitmap size in bits. */ + u32 size; + + /* The bitmap tracks the resources. */ + unsigned long *map; +}; + +union gdma_doorbell_entry { + u64 as_uint64; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 31; + u64 arm : 1; + } cq; + + struct { + u64 id : 24; + u64 wqe_cnt : 8; + u64 tail_ptr : 32; + } rq; + + struct { + u64 id : 24; + u64 reserved : 8; + u64 tail_ptr : 32; + } sq; + + struct { + u64 id : 16; + u64 reserved : 16; + u64 tail_ptr : 31; + u64 arm : 1; + } eq; +}; /* HW DATA */ + +struct gdma_msg_hdr { + u32 hdr_type; + u32 msg_type; + u16 msg_version; + u16 hwc_msg_id; + u32 msg_size; +}; /* HW DATA */ + +struct gdma_dev_id { + union { + struct { + u16 type; + u16 instance; + }; + + u32 as_uint32; + }; +}; /* HW DATA */ + +struct gdma_req_hdr { + struct gdma_msg_hdr req; + struct gdma_msg_hdr resp; /* The expected response */ + struct gdma_dev_id dev_id; + u32 activity_id; +}; /* HW DATA */ + +struct gdma_resp_hdr { + struct gdma_msg_hdr response; + struct gdma_dev_id dev_id; + u32 activity_id; + u32 status; + u32 reserved; +}; /* HW DATA */ + +struct gdma_general_req { + struct gdma_req_hdr hdr; +}; /* HW DATA */ + +#define GDMA_MESSAGE_V1 1 + +struct gdma_general_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define GDMA_STANDARD_HEADER_TYPE 0 + +static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code, + u32 req_size, u32 resp_size) +{ + hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->req.msg_type = code; + hdr->req.msg_version = GDMA_MESSAGE_V1; + hdr->req.msg_size = req_size; + + hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE; + hdr->resp.msg_type = code; + hdr->resp.msg_version = GDMA_MESSAGE_V1; + hdr->resp.msg_size = resp_size; +} + +/* The 16-byte struct is part of the GDMA work queue entry (WQE). */ +struct gdma_sge { + u64 address; + u32 mem_key; + u32 size; +}; /* HW DATA */ + +struct gdma_wqe_request { + struct gdma_sge *sgl; + u32 num_sge; + + u32 inline_oob_size; + const void *inline_oob_data; + + u32 flags; + u32 client_data_unit; +}; + +enum gdma_page_type { + GDMA_PAGE_TYPE_4K, +}; + +#define GDMA_INVALID_DMA_REGION 0 + +struct gdma_mem_info { + struct device *dev; + + dma_addr_t dma_handle; + void *virt_addr; + u64 length; + + /* Allocated by the PF driver */ + gdma_obj_handle_t dma_region_handle; +}; + +#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8 + +struct gdma_dev { + struct gdma_context *gdma_context; + + struct gdma_dev_id dev_id; + + u32 pdid; + u32 doorbell; + u32 gpa_mkey; + + /* GDMA driver specific pointer */ + void *driver_data; + + struct auxiliary_device *adev; +}; + +#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE + +#define GDMA_CQE_SIZE 64 +#define GDMA_EQE_SIZE 16 +#define GDMA_MAX_SQE_SIZE 512 +#define GDMA_MAX_RQE_SIZE 256 + +#define GDMA_COMP_DATA_SIZE 0x3C + +#define GDMA_EVENT_DATA_SIZE 0xC + +/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */ +#define GDMA_WQE_BU_SIZE 32 + +#define INVALID_PDID UINT_MAX +#define INVALID_DOORBELL UINT_MAX +#define INVALID_MEM_KEY UINT_MAX +#define INVALID_QUEUE_ID UINT_MAX +#define INVALID_PCI_MSIX_INDEX UINT_MAX + +struct gdma_comp { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + u32 wq_num; + bool is_sq; +}; + +struct gdma_event { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u8 type; +}; + +struct gdma_queue; + +struct mana_eq { + struct gdma_queue *eq; +}; + +typedef void gdma_eq_callback(void *context, struct gdma_queue *q, + struct gdma_event *e); + +typedef void gdma_cq_callback(void *context, struct gdma_queue *q); + +/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE + * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the + * driver increases the 'head' in BUs rather than in bytes, and notifies + * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track + * the HW head, and increases the 'head' by 1 for every processed EQE/CQE. + * + * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is + * processed, the driver increases the 'tail' to indicate that WQEs have + * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ. + * + * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures + * that the EQ/CQ is big enough so they can't overflow, and the driver uses + * the owner bits mechanism to detect if the queue has become empty. + */ +struct gdma_queue { + struct gdma_dev *gdma_dev; + + enum gdma_queue_type type; + u32 id; + + struct gdma_mem_info mem_info; + + void *queue_mem_ptr; + u32 queue_size; + + bool monitor_avl_buf; + + u32 head; + u32 tail; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + bool disable_needed; + + gdma_eq_callback *callback; + void *context; + + unsigned int msix_index; + + u32 log2_throttle_limit; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent; /* For CQ/EQ relationship */ + } cq; + }; +}; + +struct gdma_queue_spec { + enum gdma_queue_type type; + bool monitor_avl_buf; + unsigned int queue_size; + + /* Extra fields specific to EQ/CQ. */ + union { + struct { + gdma_eq_callback *callback; + void *context; + + unsigned long log2_throttle_limit; + } eq; + + struct { + gdma_cq_callback *callback; + void *context; + + struct gdma_queue *parent_eq; + + } cq; + }; +}; + +struct gdma_irq_context { + void (*handler)(void *arg); + void *arg; +}; + +struct gdma_context { + struct device *dev; + + /* Per-vPort max number of queues */ + unsigned int max_num_queues; + unsigned int max_num_msix; + unsigned int num_msix_usable; + struct gdma_resource msix_resource; + struct gdma_irq_context *irq_contexts; + + /* This maps a CQ index to the queue structure. */ + unsigned int max_num_cqs; + struct gdma_queue **cq_table; + + /* Protect eq_test_event and test_event_eq_id */ + struct mutex eq_test_event_mutex; + struct completion eq_test_event; + u32 test_event_eq_id; + + bool is_pf; + phys_addr_t bar0_pa; + void __iomem *bar0_va; + void __iomem *shm_base; + void __iomem *db_page_base; + phys_addr_t phys_db_page_base; + u32 db_page_size; + int numa_node; + + /* Shared memory chanenl (used to bootstrap HWC) */ + struct shm_channel shm_channel; + + /* Hardware communication channel (HWC) */ + struct gdma_dev hwc; + + /* Azure network adapter */ + struct gdma_dev mana; +}; + +#define MAX_NUM_GDMA_DEVICES 4 + +static inline bool mana_gd_is_mana(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_MANA; +} + +static inline bool mana_gd_is_hwc(struct gdma_dev *gd) +{ + return gd->dev_id.type == GDMA_DEVICE_HWC; +} + +u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset); +u32 mana_gd_wq_avail_space(struct gdma_queue *wq); + +int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq); + +int mana_gd_create_hwc_queue(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_eq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +int mana_gd_create_mana_wq_cq(struct gdma_dev *gd, + const struct gdma_queue_spec *spec, + struct gdma_queue **queue_ptr); + +void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue); + +int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe); + +void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit); + +struct gdma_wqe { + u32 reserved :24; + u32 last_vbytes :8; + + union { + u32 flags; + + struct { + u32 num_sge :8; + u32 inline_oob_size_div4:3; + u32 client_oob_in_sgl :1; + u32 reserved1 :4; + u32 client_data_unit :14; + u32 reserved2 :2; + }; + }; +}; /* HW DATA */ + +#define INLINE_OOB_SMALL_SIZE 8 +#define INLINE_OOB_LARGE_SIZE 24 + +#define MAX_TX_WQE_SIZE 512 +#define MAX_RX_WQE_SIZE 256 + +#define MAX_TX_WQE_SGL_ENTRIES ((GDMA_MAX_SQE_SIZE - \ + sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \ + sizeof(struct gdma_sge)) + +#define MAX_RX_WQE_SGL_ENTRIES ((GDMA_MAX_RQE_SIZE - \ + sizeof(struct gdma_sge)) / sizeof(struct gdma_sge)) + +struct gdma_cqe { + u32 cqe_data[GDMA_COMP_DATA_SIZE / 4]; + + union { + u32 as_uint32; + + struct { + u32 wq_num : 24; + u32 is_sq : 1; + u32 reserved : 4; + u32 owner_bits : 3; + }; + } cqe_info; +}; /* HW DATA */ + +#define GDMA_CQE_OWNER_BITS 3 + +#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1) + +#define SET_ARM_BIT 1 + +#define GDMA_EQE_OWNER_BITS 3 + +union gdma_eqe_info { + u32 as_uint32; + + struct { + u32 type : 8; + u32 reserved1 : 8; + u32 client_id : 2; + u32 reserved2 : 11; + u32 owner_bits : 3; + }; +}; /* HW DATA */ + +#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1) +#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries)) + +struct gdma_eqe { + u32 details[GDMA_EVENT_DATA_SIZE / 4]; + u32 eqe_info; +}; /* HW DATA */ + +#define GDMA_REG_DB_PAGE_OFFSET 8 +#define GDMA_REG_DB_PAGE_SIZE 0x10 +#define GDMA_REG_SHM_OFFSET 0x18 + +#define GDMA_PF_REG_DB_PAGE_SIZE 0xD0 +#define GDMA_PF_REG_DB_PAGE_OFF 0xC8 +#define GDMA_PF_REG_SHM_OFF 0x70 + +#define GDMA_SRIOV_REG_CFG_BASE_OFF 0x108 + +#define MANA_PF_DEVICE_ID 0x00B9 +#define MANA_VF_DEVICE_ID 0x00BA + +struct gdma_posted_wqe_info { + u32 wqe_size_in_bu; +}; + +/* GDMA_GENERATE_TEST_EQE */ +struct gdma_generate_test_event_req { + struct gdma_req_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_VERIFY_VF_DRIVER_VERSION */ +enum { + GDMA_PROTOCOL_V1 = 1, + GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1, + GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1, +}; + +#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0) + +/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed, + * so the driver is able to reliably support features like busy_poll. + */ +#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2) + +#define GDMA_DRV_CAP_FLAGS1 \ + (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \ + GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX) + +#define GDMA_DRV_CAP_FLAGS2 0 + +#define GDMA_DRV_CAP_FLAGS3 0 + +#define GDMA_DRV_CAP_FLAGS4 0 + +struct gdma_verify_ver_req { + struct gdma_req_hdr hdr; + + /* Mandatory fields required for protocol establishment */ + u64 protocol_ver_min; + u64 protocol_ver_max; + + /* Gdma Driver Capability Flags */ + u64 gd_drv_cap_flags1; + u64 gd_drv_cap_flags2; + u64 gd_drv_cap_flags3; + u64 gd_drv_cap_flags4; + + /* Advisory fields */ + u64 drv_ver; + u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */ + u32 reserved; + u32 os_ver_major; + u32 os_ver_minor; + u32 os_ver_build; + u32 os_ver_platform; + u64 reserved_2; + u8 os_ver_str1[128]; + u8 os_ver_str2[128]; + u8 os_ver_str3[128]; + u8 os_ver_str4[128]; +}; /* HW DATA */ + +struct gdma_verify_ver_resp { + struct gdma_resp_hdr hdr; + u64 gdma_protocol_ver; + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; +}; /* HW DATA */ + +/* GDMA_QUERY_MAX_RESOURCES */ +struct gdma_query_max_resources_resp { + struct gdma_resp_hdr hdr; + u32 status; + u32 max_sq; + u32 max_rq; + u32 max_cq; + u32 max_eq; + u32 max_db; + u32 max_mst; + u32 max_cq_mod_ctx; + u32 max_mod_cq; + u32 max_msix; +}; /* HW DATA */ + +/* GDMA_LIST_DEVICES */ +struct gdma_list_devices_resp { + struct gdma_resp_hdr hdr; + u32 num_of_devs; + u32 reserved; + struct gdma_dev_id devs[64]; +}; /* HW DATA */ + +/* GDMA_REGISTER_DEVICE */ +struct gdma_register_device_resp { + struct gdma_resp_hdr hdr; + u32 pdid; + u32 gpa_mkey; + u32 db_id; +}; /* HW DATA */ + +struct gdma_allocate_resource_range_req { + struct gdma_req_hdr hdr; + u32 resource_type; + u32 num_resources; + u32 alignment; + u32 allocated_resources; +}; + +struct gdma_allocate_resource_range_resp { + struct gdma_resp_hdr hdr; + u32 allocated_resources; +}; + +struct gdma_destroy_resource_range_req { + struct gdma_req_hdr hdr; + u32 resource_type; + u32 num_resources; + u32 allocated_resources; +}; + +/* GDMA_CREATE_QUEUE */ +struct gdma_create_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 reserved1; + u32 pdid; + u32 doolbell_id; + gdma_obj_handle_t gdma_region; + u32 reserved2; + u32 queue_size; + u32 log2_throttle_limit; + u32 eq_pci_msix_index; + u32 cq_mod_ctx_id; + u32 cq_parent_eq_id; + u8 rq_drop_on_overrun; + u8 rq_err_on_wqe_overflow; + u8 rq_chain_rec_wqes; + u8 sq_hw_db; + u32 reserved3; +}; /* HW DATA */ + +struct gdma_create_queue_resp { + struct gdma_resp_hdr hdr; + u32 queue_index; +}; /* HW DATA */ + +/* GDMA_DISABLE_QUEUE */ +struct gdma_disable_queue_req { + struct gdma_req_hdr hdr; + u32 type; + u32 queue_index; + u32 alloc_res_id_on_creation; +}; /* HW DATA */ + +enum atb_page_size { + ATB_PAGE_SIZE_4K, + ATB_PAGE_SIZE_8K, + ATB_PAGE_SIZE_16K, + ATB_PAGE_SIZE_32K, + ATB_PAGE_SIZE_64K, + ATB_PAGE_SIZE_128K, + ATB_PAGE_SIZE_256K, + ATB_PAGE_SIZE_512K, + ATB_PAGE_SIZE_1M, + ATB_PAGE_SIZE_2M, + ATB_PAGE_SIZE_MAX, +}; + +enum gdma_mr_access_flags { + GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0), + GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1), + GDMA_ACCESS_FLAG_REMOTE_READ = BIT_ULL(2), + GDMA_ACCESS_FLAG_REMOTE_WRITE = BIT_ULL(3), + GDMA_ACCESS_FLAG_REMOTE_ATOMIC = BIT_ULL(4), +}; + +/* GDMA_CREATE_DMA_REGION */ +struct gdma_create_dma_region_req { + struct gdma_req_hdr hdr; + + /* The total size of the DMA region */ + u64 length; + + /* The offset in the first page */ + u32 offset_in_page; + + /* enum gdma_page_type */ + u32 gdma_page_type; + + /* The total number of pages */ + u32 page_count; + + /* If page_addr_list_len is smaller than page_count, + * the remaining page addresses will be added via the + * message GDMA_DMA_REGION_ADD_PAGES. + */ + u32 page_addr_list_len; + u64 page_addr_list[]; +}; /* HW DATA */ + +struct gdma_create_dma_region_resp { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t dma_region_handle; +}; /* HW DATA */ + +/* GDMA_DMA_REGION_ADD_PAGES */ +struct gdma_dma_region_add_pages_req { + struct gdma_req_hdr hdr; + + gdma_obj_handle_t dma_region_handle; + + u32 page_addr_list_len; + u32 reserved3; + + u64 page_addr_list[]; +}; /* HW DATA */ + +/* GDMA_DESTROY_DMA_REGION */ +struct gdma_destroy_dma_region_req { + struct gdma_req_hdr hdr; + + gdma_obj_handle_t dma_region_handle; +}; /* HW DATA */ + +enum gdma_pd_flags { + GDMA_PD_FLAG_INVALID = 0, +}; + +struct gdma_create_pd_req { + struct gdma_req_hdr hdr; + enum gdma_pd_flags flags; + u32 reserved; +};/* HW DATA */ + +struct gdma_create_pd_resp { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t pd_handle; + u32 pd_id; + u32 reserved; +};/* HW DATA */ + +struct gdma_destroy_pd_req { + struct gdma_req_hdr hdr; + gdma_obj_handle_t pd_handle; +};/* HW DATA */ + +struct gdma_destory_pd_resp { + struct gdma_resp_hdr hdr; +};/* HW DATA */ + +enum gdma_mr_type { + /* Guest Virtual Address - MRs of this type allow access + * to memory mapped by PTEs associated with this MR using a virtual + * address that is set up in the MST + */ + GDMA_MR_TYPE_GVA = 2, +}; + +struct gdma_create_mr_params { + gdma_obj_handle_t pd_handle; + enum gdma_mr_type mr_type; + union { + struct { + gdma_obj_handle_t dma_region_handle; + u64 virtual_address; + enum gdma_mr_access_flags access_flags; + } gva; + }; +}; + +struct gdma_create_mr_request { + struct gdma_req_hdr hdr; + gdma_obj_handle_t pd_handle; + enum gdma_mr_type mr_type; + u32 reserved_1; + + union { + struct { + gdma_obj_handle_t dma_region_handle; + u64 virtual_address; + enum gdma_mr_access_flags access_flags; + } gva; + + }; + u32 reserved_2; +};/* HW DATA */ + +struct gdma_create_mr_response { + struct gdma_resp_hdr hdr; + gdma_obj_handle_t mr_handle; + u32 lkey; + u32 rkey; +};/* HW DATA */ + +struct gdma_destroy_mr_request { + struct gdma_req_hdr hdr; + gdma_obj_handle_t mr_handle; +};/* HW DATA */ + +struct gdma_destroy_mr_response { + struct gdma_resp_hdr hdr; +};/* HW DATA */ + +int mana_gd_verify_vf_version(struct pci_dev *pdev); + +int mana_gd_register_device(struct gdma_dev *gd); +int mana_gd_deregister_device(struct gdma_dev *gd); + +int mana_gd_post_work_request(struct gdma_queue *wq, + const struct gdma_wqe_request *wqe_req, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_post_and_ring(struct gdma_queue *queue, + const struct gdma_wqe_request *wqe, + struct gdma_posted_wqe_info *wqe_info); + +int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r); +void mana_gd_free_res_map(struct gdma_resource *r); + +void mana_gd_wq_ring_doorbell(struct gdma_context *gc, + struct gdma_queue *queue); + +int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length, + struct gdma_mem_info *gmi); + +void mana_gd_free_memory(struct gdma_mem_info *gmi); + +int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req, + u32 resp_len, void *resp); + +int mana_gd_destroy_dma_region(struct gdma_context *gc, + gdma_obj_handle_t dma_region_handle); + +#endif /* _GDMA_H */ diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h new file mode 100644 index 000000000000..6a757a6e2732 --- /dev/null +++ b/include/net/mana/hw_channel.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _HW_CHANNEL_H +#define _HW_CHANNEL_H + +#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ 4 + +#define HW_CHANNEL_MAX_REQUEST_SIZE 0x1000 +#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000 + +#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1 + +#define HWC_INIT_DATA_CQID 1 +#define HWC_INIT_DATA_RQID 2 +#define HWC_INIT_DATA_SQID 3 +#define HWC_INIT_DATA_QUEUE_DEPTH 4 +#define HWC_INIT_DATA_MAX_REQUEST 5 +#define HWC_INIT_DATA_MAX_RESPONSE 6 +#define HWC_INIT_DATA_MAX_NUM_CQS 7 +#define HWC_INIT_DATA_PDID 8 +#define HWC_INIT_DATA_GPA_MKEY 9 +#define HWC_INIT_DATA_PF_DEST_RQ_ID 10 +#define HWC_INIT_DATA_PF_DEST_CQ_ID 11 + +/* Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +union hwc_init_eq_id_db { + u32 as_uint32; + + struct { + u32 eq_id : 16; + u32 doorbell : 16; + }; +}; /* HW DATA */ + +union hwc_init_type_data { + u32 as_uint32; + + struct { + u32 value : 24; + u32 type : 8; + }; +}; /* HW DATA */ + +struct hwc_rx_oob { + u32 type : 6; + u32 eom : 1; + u32 som : 1; + u32 vendor_err : 8; + u32 reserved1 : 16; + + u32 src_virt_wq : 24; + u32 src_vfid : 8; + + u32 reserved2; + + union { + u32 wqe_addr_low; + u32 wqe_offset; + }; + + u32 wqe_addr_high; + + u32 client_data_unit : 14; + u32 reserved3 : 18; + + u32 tx_oob_data_size; + + u32 chunk_offset : 21; + u32 reserved4 : 11; +}; /* HW DATA */ + +struct hwc_tx_oob { + u32 reserved1; + + u32 reserved2; + + u32 vrq_id : 24; + u32 dest_vfid : 8; + + u32 vrcq_id : 24; + u32 reserved3 : 8; + + u32 vscq_id : 24; + u32 loopback : 1; + u32 lso_override: 1; + u32 dest_pf : 1; + u32 reserved4 : 5; + + u32 vsq_id : 24; + u32 reserved5 : 8; +}; /* HW DATA */ + +struct hwc_work_request { + void *buf_va; + void *buf_sge_addr; + u32 buf_len; + u32 msg_size; + + struct gdma_wqe_request wqe_req; + struct hwc_tx_oob tx_oob; + + struct gdma_sge sge; +}; + +/* hwc_dma_buf represents the array of in-flight WQEs. + * mem_info as know as the GDMA mapped memory is partitioned and used by + * in-flight WQEs. + * The number of WQEs is determined by the number of in-flight messages. + */ +struct hwc_dma_buf { + struct gdma_mem_info mem_info; + + u32 gpa_mkey; + + u32 num_reqs; + struct hwc_work_request reqs[]; +}; + +typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id, + const struct hwc_rx_oob *rx_oob); + +typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id, + const struct hwc_rx_oob *rx_oob); + +struct hwc_cq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_cq; + struct gdma_queue *gdma_eq; + struct gdma_comp *comp_buf; + u16 queue_depth; + + hwc_rx_event_handler_t *rx_event_handler; + void *rx_event_ctx; + + hwc_tx_event_handler_t *tx_event_handler; + void *tx_event_ctx; +}; + +struct hwc_wq { + struct hw_channel_context *hwc; + + struct gdma_queue *gdma_wq; + struct hwc_dma_buf *msg_buf; + u16 queue_depth; + + struct hwc_cq *hwc_cq; +}; + +struct hwc_caller_ctx { + struct completion comp_event; + void *output_buf; + u32 output_buflen; + + u32 error; /* Linux error code */ + u32 status_code; +}; + +struct hw_channel_context { + struct gdma_dev *gdma_dev; + struct device *dev; + + u16 num_inflight_msg; + u32 max_req_msg_size; + + u16 hwc_init_q_depth_max; + u32 hwc_init_max_req_msg_size; + u32 hwc_init_max_resp_msg_size; + + struct completion hwc_init_eqe_comp; + + struct hwc_wq *rxq; + struct hwc_wq *txq; + struct hwc_cq *cq; + + struct semaphore sema; + struct gdma_resource inflight_msg_res; + + u32 pf_dest_vrq_id; + u32 pf_dest_vrcq_id; + + struct hwc_caller_ctx *caller_ctx; +}; + +int mana_hwc_create_channel(struct gdma_context *gc); +void mana_hwc_destroy_channel(struct gdma_context *gc); + +int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, + const void *req, u32 resp_len, void *resp); + +#endif /* _HW_CHANNEL_H */ diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h new file mode 100644 index 000000000000..575ea36ce606 --- /dev/null +++ b/include/net/mana/mana.h @@ -0,0 +1,648 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _MANA_H +#define _MANA_H + +#include "gdma.h" +#include "hw_channel.h" + +/* Microsoft Azure Network Adapter (MANA)'s definitions + * + * Structures labeled with "HW DATA" are exchanged with the hardware. All of + * them are naturally aligned and hence don't need __packed. + */ + +/* MANA protocol version */ +#define MANA_MAJOR_VERSION 0 +#define MANA_MINOR_VERSION 1 +#define MANA_MICRO_VERSION 1 + +typedef u64 mana_handle_t; +#define INVALID_MANA_HANDLE ((mana_handle_t)-1) + +enum TRI_STATE { + TRI_STATE_UNKNOWN = -1, + TRI_STATE_FALSE = 0, + TRI_STATE_TRUE = 1 +}; + +/* Number of entries for hardware indirection table must be in power of 2 */ +#define MANA_INDIRECT_TABLE_SIZE 64 +#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1) + +/* The Toeplitz hash key's length in bytes: should be multiple of 8 */ +#define MANA_HASH_KEY_SIZE 40 + +#define COMP_ENTRY_SIZE 64 + +#define ADAPTER_MTU_SIZE 1500 +#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14) + +#define RX_BUFFERS_PER_QUEUE 512 + +#define MAX_SEND_BUFFERS_PER_QUEUE 256 + +#define EQ_SIZE (8 * PAGE_SIZE) +#define LOG2_EQ_THROTTLE 3 + +#define MAX_PORTS_IN_MANA_DEV 256 + +struct mana_stats_rx { + u64 packets; + u64 bytes; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_redirect; + struct u64_stats_sync syncp; +}; + +struct mana_stats_tx { + u64 packets; + u64 bytes; + u64 xdp_xmit; + struct u64_stats_sync syncp; +}; + +struct mana_txq { + struct gdma_queue *gdma_sq; + + union { + u32 gdma_txq_id; + struct { + u32 reserved1 : 10; + u32 vsq_frame : 14; + u32 reserved2 : 8; + }; + }; + + u16 vp_offset; + + struct net_device *ndev; + + /* The SKBs are sent to the HW and we are waiting for the CQEs. */ + struct sk_buff_head pending_skbs; + struct netdev_queue *net_txq; + + atomic_t pending_sends; + + struct mana_stats_tx stats; +}; + +/* skb data and frags dma mappings */ +struct mana_skb_head { + dma_addr_t dma_handle[MAX_SKB_FRAGS + 1]; + + u32 size[MAX_SKB_FRAGS + 1]; +}; + +#define MANA_HEADROOM sizeof(struct mana_skb_head) + +enum mana_tx_pkt_format { + MANA_SHORT_PKT_FMT = 0, + MANA_LONG_PKT_FMT = 1, +}; + +struct mana_tx_short_oob { + u32 pkt_fmt : 2; + u32 is_outer_ipv4 : 1; + u32 is_outer_ipv6 : 1; + u32 comp_iphdr_csum : 1; + u32 comp_tcp_csum : 1; + u32 comp_udp_csum : 1; + u32 supress_txcqe_gen : 1; + u32 vcq_num : 24; + + u32 trans_off : 10; /* Transport header offset */ + u32 vsq_frame : 14; + u32 short_vp_offset : 8; +}; /* HW DATA */ + +struct mana_tx_long_oob { + u32 is_encap : 1; + u32 inner_is_ipv6 : 1; + u32 inner_tcp_opt : 1; + u32 inject_vlan_pri_tag : 1; + u32 reserved1 : 12; + u32 pcp : 3; /* 802.1Q */ + u32 dei : 1; /* 802.1Q */ + u32 vlan_id : 12; /* 802.1Q */ + + u32 inner_frame_offset : 10; + u32 inner_ip_rel_offset : 6; + u32 long_vp_offset : 12; + u32 reserved2 : 4; + + u32 reserved3; + u32 reserved4; +}; /* HW DATA */ + +struct mana_tx_oob { + struct mana_tx_short_oob s_oob; + struct mana_tx_long_oob l_oob; +}; /* HW DATA */ + +enum mana_cq_type { + MANA_CQ_TYPE_RX, + MANA_CQ_TYPE_TX, +}; + +enum mana_cqe_type { + CQE_INVALID = 0, + CQE_RX_OKAY = 1, + CQE_RX_COALESCED_4 = 2, + CQE_RX_OBJECT_FENCE = 3, + CQE_RX_TRUNCATED = 4, + + CQE_TX_OKAY = 32, + CQE_TX_SA_DROP = 33, + CQE_TX_MTU_DROP = 34, + CQE_TX_INVALID_OOB = 35, + CQE_TX_INVALID_ETH_TYPE = 36, + CQE_TX_HDR_PROCESSING_ERROR = 37, + CQE_TX_VF_DISABLED = 38, + CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39, + CQE_TX_VPORT_DISABLED = 40, + CQE_TX_VLAN_TAGGING_VIOLATION = 41, +}; + +#define MANA_CQE_COMPLETION 1 + +struct mana_cqe_header { + u32 cqe_type : 6; + u32 client_type : 2; + u32 vendor_err : 24; +}; /* HW DATA */ + +/* NDIS HASH Types */ +#define NDIS_HASH_IPV4 BIT(0) +#define NDIS_HASH_TCP_IPV4 BIT(1) +#define NDIS_HASH_UDP_IPV4 BIT(2) +#define NDIS_HASH_IPV6 BIT(3) +#define NDIS_HASH_TCP_IPV6 BIT(4) +#define NDIS_HASH_UDP_IPV6 BIT(5) +#define NDIS_HASH_IPV6_EX BIT(6) +#define NDIS_HASH_TCP_IPV6_EX BIT(7) +#define NDIS_HASH_UDP_IPV6_EX BIT(8) + +#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX) +#define MANA_HASH_L4 \ + (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \ + NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX) + +struct mana_rxcomp_perpkt_info { + u32 pkt_len : 16; + u32 reserved1 : 16; + u32 reserved2; + u32 pkt_hash; +}; /* HW DATA */ + +#define MANA_RXCOMP_OOB_NUM_PPI 4 + +/* Receive completion OOB */ +struct mana_rxcomp_oob { + struct mana_cqe_header cqe_hdr; + + u32 rx_vlan_id : 12; + u32 rx_vlantag_present : 1; + u32 rx_outer_iphdr_csum_succeed : 1; + u32 rx_outer_iphdr_csum_fail : 1; + u32 reserved1 : 1; + u32 rx_hashtype : 9; + u32 rx_iphdr_csum_succeed : 1; + u32 rx_iphdr_csum_fail : 1; + u32 rx_tcp_csum_succeed : 1; + u32 rx_tcp_csum_fail : 1; + u32 rx_udp_csum_succeed : 1; + u32 rx_udp_csum_fail : 1; + u32 reserved2 : 1; + + struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI]; + + u32 rx_wqe_offset; +}; /* HW DATA */ + +struct mana_tx_comp_oob { + struct mana_cqe_header cqe_hdr; + + u32 tx_data_offset; + + u32 tx_sgl_offset : 5; + u32 tx_wqe_offset : 27; + + u32 reserved[12]; +}; /* HW DATA */ + +struct mana_rxq; + +#define CQE_POLLING_BUFFER 512 + +struct mana_cq { + struct gdma_queue *gdma_cq; + + /* Cache the CQ id (used to verify if each CQE comes to the right CQ. */ + u32 gdma_id; + + /* Type of the CQ: TX or RX */ + enum mana_cq_type type; + + /* Pointer to the mana_rxq that is pushing RX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_RX. + */ + struct mana_rxq *rxq; + + /* Pointer to the mana_txq that is pushing TX CQEs to the queue. + * Only and must be non-NULL if type is MANA_CQ_TYPE_TX. + */ + struct mana_txq *txq; + + /* Buffer which the CQ handler can copy the CQE's into. */ + struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER]; + + /* NAPI data */ + struct napi_struct napi; + int work_done; + int budget; +}; + +struct mana_recv_buf_oob { + /* A valid GDMA work request representing the data buffer. */ + struct gdma_wqe_request wqe_req; + + void *buf_va; + dma_addr_t buf_dma_addr; + + /* SGL of the buffer going to be sent has part of the work request. */ + u32 num_sge; + struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES]; + + /* Required to store the result of mana_gd_post_work_request. + * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the + * work queue when the WQE is consumed. + */ + struct gdma_posted_wqe_info wqe_inf; +}; + +struct mana_rxq { + struct gdma_queue *gdma_rq; + /* Cache the gdma receive queue id */ + u32 gdma_id; + + /* Index of RQ in the vPort, not gdma receive queue id */ + u32 rxq_idx; + + u32 datasize; + + mana_handle_t rxobj; + + struct mana_cq rx_cq; + + struct completion fence_event; + + struct net_device *ndev; + + /* Total number of receive buffers to be allocated */ + u32 num_rx_buf; + + u32 buf_index; + + struct mana_stats_rx stats; + + struct bpf_prog __rcu *bpf_prog; + struct xdp_rxq_info xdp_rxq; + struct page *xdp_save_page; + bool xdp_flush; + int xdp_rc; /* XDP redirect return code */ + + /* MUST BE THE LAST MEMBER: + * Each receive buffer has an associated mana_recv_buf_oob. + */ + struct mana_recv_buf_oob rx_oobs[]; +}; + +struct mana_tx_qp { + struct mana_txq txq; + + struct mana_cq tx_cq; + + mana_handle_t tx_object; +}; + +struct mana_ethtool_stats { + u64 stop_queue; + u64 wake_queue; +}; + +struct mana_context { + struct gdma_dev *gdma_dev; + + u16 num_ports; + + struct mana_eq *eqs; + + struct net_device *ports[MAX_PORTS_IN_MANA_DEV]; +}; + +struct mana_port_context { + struct mana_context *ac; + struct net_device *ndev; + + u8 mac_addr[ETH_ALEN]; + + enum TRI_STATE rss_state; + + mana_handle_t default_rxobj; + bool tx_shortform_allowed; + u16 tx_vp_offset; + + struct mana_tx_qp *tx_qp; + + /* Indirection Table for RX & TX. The values are queue indexes */ + u32 indir_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Indirection table containing RxObject Handles */ + mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE]; + + /* Hash key used by the NIC */ + u8 hashkey[MANA_HASH_KEY_SIZE]; + + /* This points to an array of num_queues of RQ pointers. */ + struct mana_rxq **rxqs; + + struct bpf_prog *bpf_prog; + + /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */ + unsigned int max_queues; + unsigned int num_queues; + + mana_handle_t port_handle; + mana_handle_t pf_filter_handle; + + /* Mutex for sharing access to vport_use_count */ + struct mutex vport_mutex; + int vport_use_count; + + u16 port_idx; + + bool port_is_up; + bool port_st_save; /* Saved port state */ + + struct mana_ethtool_stats eth_stats; +}; + +netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev); +int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx, + bool update_hash, bool update_tab); + +int mana_alloc_queues(struct net_device *ndev); +int mana_attach(struct net_device *ndev); +int mana_detach(struct net_device *ndev, bool from_close); + +int mana_probe(struct gdma_dev *gd, bool resuming); +void mana_remove(struct gdma_dev *gd, bool suspending); + +void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev); +int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames, + u32 flags); +u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq, + struct xdp_buff *xdp, void *buf_va, uint pkt_len); +struct bpf_prog *mana_xdp_get(struct mana_port_context *apc); +void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog); +int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf); + +extern const struct ethtool_ops mana_ethtool_ops; + +struct mana_obj_spec { + u32 queue_index; + u64 gdma_region; + u32 queue_size; + u32 attached_eq; + u32 modr_ctx_id; +}; + +enum mana_command_code { + MANA_QUERY_DEV_CONFIG = 0x20001, + MANA_QUERY_GF_STAT = 0x20002, + MANA_CONFIG_VPORT_TX = 0x20003, + MANA_CREATE_WQ_OBJ = 0x20004, + MANA_DESTROY_WQ_OBJ = 0x20005, + MANA_FENCE_RQ = 0x20006, + MANA_CONFIG_VPORT_RX = 0x20007, + MANA_QUERY_VPORT_CONFIG = 0x20008, + + /* Privileged commands for the PF mode */ + MANA_REGISTER_FILTER = 0x28000, + MANA_DEREGISTER_FILTER = 0x28001, + MANA_REGISTER_HW_PORT = 0x28003, + MANA_DEREGISTER_HW_PORT = 0x28004, +}; + +/* Query Device Configuration */ +struct mana_query_device_cfg_req { + struct gdma_req_hdr hdr; + + /* MANA Nic Driver Capability flags */ + u64 mn_drv_cap_flags1; + u64 mn_drv_cap_flags2; + u64 mn_drv_cap_flags3; + u64 mn_drv_cap_flags4; + + u32 proto_major_ver; + u32 proto_minor_ver; + u32 proto_micro_ver; + + u32 reserved; +}; /* HW DATA */ + +struct mana_query_device_cfg_resp { + struct gdma_resp_hdr hdr; + + u64 pf_cap_flags1; + u64 pf_cap_flags2; + u64 pf_cap_flags3; + u64 pf_cap_flags4; + + u16 max_num_vports; + u16 reserved; + u32 max_num_eqs; +}; /* HW DATA */ + +/* Query vPort Configuration */ +struct mana_query_vport_cfg_req { + struct gdma_req_hdr hdr; + u32 vport_index; +}; /* HW DATA */ + +struct mana_query_vport_cfg_resp { + struct gdma_resp_hdr hdr; + u32 max_num_sq; + u32 max_num_rq; + u32 num_indirection_ent; + u32 reserved1; + u8 mac_addr[6]; + u8 reserved2[2]; + mana_handle_t vport; +}; /* HW DATA */ + +/* Configure vPort */ +struct mana_config_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 pdid; + u32 doorbell_pageid; +}; /* HW DATA */ + +struct mana_config_vport_resp { + struct gdma_resp_hdr hdr; + u16 tx_vport_offset; + u8 short_form_allowed; + u8 reserved; +}; /* HW DATA */ + +/* Create WQ Object */ +struct mana_create_wqobj_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u32 wq_type; + u32 reserved; + u64 wq_gdma_region; + u64 cq_gdma_region; + u32 wq_size; + u32 cq_size; + u32 cq_moderation_ctx_id; + u32 cq_parent_qid; +}; /* HW DATA */ + +struct mana_create_wqobj_resp { + struct gdma_resp_hdr hdr; + u32 wq_id; + u32 cq_id; + mana_handle_t wq_obj; +}; /* HW DATA */ + +/* Destroy WQ Object */ +struct mana_destroy_wqobj_req { + struct gdma_req_hdr hdr; + u32 wq_type; + u32 reserved; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_destroy_wqobj_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Fence RQ */ +struct mana_fence_rq_req { + struct gdma_req_hdr hdr; + mana_handle_t wq_obj_handle; +}; /* HW DATA */ + +struct mana_fence_rq_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Configure vPort Rx Steering */ +struct mana_cfg_rx_steer_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u16 num_indir_entries; + u16 indir_tab_offset; + u32 rx_enable; + u32 rss_enable; + u8 update_default_rxobj; + u8 update_hashkey; + u8 update_indir_tab; + u8 reserved; + mana_handle_t default_rxobj; + u8 hashkey[MANA_HASH_KEY_SIZE]; +}; /* HW DATA */ + +struct mana_cfg_rx_steer_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Register HW vPort */ +struct mana_register_hw_vport_req { + struct gdma_req_hdr hdr; + u16 attached_gfid; + u8 is_pf_default_vport; + u8 reserved1; + u8 allow_all_ether_types; + u8 reserved2; + u8 reserved3; + u8 reserved4; +}; /* HW DATA */ + +struct mana_register_hw_vport_resp { + struct gdma_resp_hdr hdr; + mana_handle_t hw_vport_handle; +}; /* HW DATA */ + +/* Deregister HW vPort */ +struct mana_deregister_hw_vport_req { + struct gdma_req_hdr hdr; + mana_handle_t hw_vport_handle; +}; /* HW DATA */ + +struct mana_deregister_hw_vport_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +/* Register filter */ +struct mana_register_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t vport; + u8 mac_addr[6]; + u8 reserved1; + u8 reserved2; + u8 reserved3; + u8 reserved4; + u16 reserved5; + u32 reserved6; + u32 reserved7; + u32 reserved8; +}; /* HW DATA */ + +struct mana_register_filter_resp { + struct gdma_resp_hdr hdr; + mana_handle_t filter_handle; +}; /* HW DATA */ + +/* Deregister filter */ +struct mana_deregister_filter_req { + struct gdma_req_hdr hdr; + mana_handle_t filter_handle; +}; /* HW DATA */ + +struct mana_deregister_filter_resp { + struct gdma_resp_hdr hdr; +}; /* HW DATA */ + +#define MANA_MAX_NUM_QUEUES 64 + +#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1) + +struct mana_tx_package { + struct gdma_wqe_request wqe_req; + struct gdma_sge sgl_array[5]; + struct gdma_sge *sgl_ptr; + + struct mana_tx_oob tx_oob; + + struct gdma_posted_wqe_info wqe_info; +}; + +int mana_create_wq_obj(struct mana_port_context *apc, + mana_handle_t vport, + u32 wq_type, struct mana_obj_spec *wq_spec, + struct mana_obj_spec *cq_spec, + mana_handle_t *wq_obj); + +void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type, + mana_handle_t wq_obj); + +int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id, + u32 doorbell_pg_id); +void mana_uncfg_vport(struct mana_port_context *apc); +#endif /* _MANA_H */ diff --git a/include/net/mana/mana_auxiliary.h b/include/net/mana/mana_auxiliary.h new file mode 100644 index 000000000000..373d59756846 --- /dev/null +++ b/include/net/mana/mana_auxiliary.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2022, Microsoft Corporation. */ + +#include "mana.h" +#include <linux/auxiliary_bus.h> + +struct mana_adev { + struct auxiliary_device adev; + struct gdma_dev *mdev; +}; diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h new file mode 100644 index 000000000000..5199b41497ff --- /dev/null +++ b/include/net/mana/shm_channel.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */ +/* Copyright (c) 2021, Microsoft Corporation. */ + +#ifndef _SHM_CHANNEL_H +#define _SHM_CHANNEL_H + +struct shm_channel { + struct device *dev; + void __iomem *base; +}; + +void mana_smc_init(struct shm_channel *sc, struct device *dev, + void __iomem *base); + +int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr, + u64 cq_addr, u64 rq_addr, u64 sq_addr, + u32 eq_msix_index); + +int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf); + +#endif /* _SHM_CHANNEL_H */ diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 412479ebf5ad..3c5c68618fcc 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -97,8 +97,6 @@ struct mptcp_out_options { }; #ifdef CONFIG_MPTCP -extern struct request_sock_ops mptcp_subflow_request_sock_ops; - void mptcp_init(void); static inline bool sk_is_mptcp(const struct sock *sk) @@ -188,6 +186,9 @@ void mptcp_seq_show(struct seq_file *seq); int mptcp_subflow_init_cookie_req(struct request_sock *req, const struct sock *sk_listener, struct sk_buff *skb); +struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener); __be32 mptcp_get_reset_option(const struct sk_buff *skb); @@ -274,6 +275,13 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req, return 0; /* TCP fallback */ } +static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops, + struct sock *sk_listener, + bool attach_listener) +{ + return NULL; +} + static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); } #endif /* CONFIG_MPTCP */ diff --git a/include/net/mrp.h b/include/net/mrp.h index 92cd3fb6cf9d..b28915ffea28 100644 --- a/include/net/mrp.h +++ b/include/net/mrp.h @@ -124,6 +124,7 @@ struct mrp_applicant { struct sk_buff *pdu; struct rb_root mad; struct rcu_head rcu; + bool active; }; struct mrp_port { diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 8c3587d5c308..78beaa765c73 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -92,7 +92,9 @@ struct net { struct ns_common ns; struct ref_tracker_dir refcnt_tracker; - + struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not + * refcounted against netns + */ struct list_head dev_base_head; struct proc_dir_entry *proc_net; struct proc_dir_entry *proc_net_stat; @@ -320,19 +322,31 @@ static inline int check_net(const struct net *net) #endif -static inline void netns_tracker_alloc(struct net *net, - netns_tracker *tracker, gfp_t gfp) +static inline void __netns_tracker_alloc(struct net *net, + netns_tracker *tracker, + bool refcounted, + gfp_t gfp) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER - ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp); + ref_tracker_alloc(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, + tracker, gfp); #endif } -static inline void netns_tracker_free(struct net *net, - netns_tracker *tracker) +static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker, + gfp_t gfp) +{ + __netns_tracker_alloc(net, tracker, true, gfp); +} + +static inline void __netns_tracker_free(struct net *net, + netns_tracker *tracker, + bool refcounted) { #ifdef CONFIG_NET_NS_REFCNT_TRACKER - ref_tracker_free(&net->refcnt_tracker, tracker); + ref_tracker_free(refcounted ? &net->refcnt_tracker : + &net->notrefcnt_tracker, tracker); #endif } @@ -346,7 +360,7 @@ static inline struct net *get_net_track(struct net *net, static inline void put_net_track(struct net *net, netns_tracker *tracker) { - netns_tracker_free(net, tracker); + __netns_tracker_free(net, tracker, true); put_net(net); } diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index b2b9de70d9f4..71d1269fe4d4 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -71,8 +71,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb) return ret; } -unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff, - struct nf_conn *ct, enum ip_conntrack_info ctinfo); +unsigned int nf_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l4proto *proto); diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h index 9939c366f720..f30b1694b690 100644 --- a/include/net/netfilter/nf_conntrack_helper.h +++ b/include/net/netfilter/nf_conntrack_helper.h @@ -115,6 +115,11 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp); int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags); +int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, u16 proto); +int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family, + u8 proto, bool nat, struct nf_conntrack_helper **hp); + void nf_ct_helper_destroy(struct nf_conn *ct); static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct) diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index e9eb01e99d2f..9877f064548a 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -104,6 +104,10 @@ unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int *action, + const struct nf_nat_range2 *range, bool commit); + static inline int nf_nat_initialized(const struct nf_conn *ct, enum nf_nat_manip_type manip) { diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index cdb7db9b0e25..e69ce23566ea 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -24,6 +24,7 @@ struct module; enum { NFT_PKTINFO_L4PROTO = (1 << 0), NFT_PKTINFO_INNER = (1 << 1), + NFT_PKTINFO_INNER_FULL = (1 << 2), }; struct nft_pktinfo { @@ -32,8 +33,8 @@ struct nft_pktinfo { u8 flags; u8 tprot; u16 fragoff; - unsigned int thoff; - unsigned int inneroff; + u16 thoff; + u16 inneroff; }; static inline struct sock *nft_sk(const struct nft_pktinfo *pkt) @@ -375,10 +376,14 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) return (void *)expr->data; } +struct nft_expr_info; + +int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla, + struct nft_expr_info *info); int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src); void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr); int nft_expr_dump(struct sk_buff *skb, unsigned int attr, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); bool nft_expr_reduce_bitwise(struct nft_regs_track *track, const struct nft_expr *expr); @@ -864,6 +869,7 @@ struct nft_expr_type { const struct nlattr * const tb[]); void (*release_ops)(const struct nft_expr_ops *ops); const struct nft_expr_ops *ops; + const struct nft_expr_ops *inner_ops; struct list_head list; const char *name; struct module *owner; @@ -921,7 +927,8 @@ struct nft_expr_ops { void (*destroy_clone)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, + bool reset); int (*validate)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data); diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index 1223af68cd9a..3e825381ac5c 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -18,6 +18,8 @@ extern struct nft_expr_type nft_meta_type; extern struct nft_expr_type nft_rt_type; extern struct nft_expr_type nft_exthdr_type; extern struct nft_expr_type nft_last_type; +extern struct nft_expr_type nft_objref_type; +extern struct nft_expr_type nft_inner_type; #ifdef CONFIG_NETWORK_SECMARK extern struct nft_object_type nft_secmark_obj_type; @@ -66,16 +68,6 @@ struct nft_payload { u8 dreg; }; -struct nft_payload_set { - enum nft_payload_bases base:8; - u8 offset; - u8 len; - u8 sreg; - u8 csum_type; - u8 csum_offset; - u8 csum_flags; -}; - extern const struct nft_expr_ops nft_payload_fast_ops; extern const struct nft_expr_ops nft_bitwise_fast_ops; @@ -148,4 +140,28 @@ void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt); + +enum { + NFT_PAYLOAD_CTX_INNER_TUN = (1 << 0), + NFT_PAYLOAD_CTX_INNER_LL = (1 << 1), + NFT_PAYLOAD_CTX_INNER_NH = (1 << 2), + NFT_PAYLOAD_CTX_INNER_TH = (1 << 3), +}; + +struct nft_inner_tun_ctx { + u16 type; + u16 inner_tunoff; + u16 inner_lloff; + u16 inner_nhoff; + u16 inner_thoff; + __be16 llproto; + u8 l4proto; + u8 flags; +}; + +int nft_payload_inner_offset(const struct nft_pktinfo *pkt); +void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs, + const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *ctx); + #endif /* _NET_NF_TABLES_CORE_H */ diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index c4a6147b0ef8..112708f7a6b4 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -35,6 +35,8 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt) return -1; else if (len < thoff) return -1; + else if (thoff < sizeof(*iph)) + return -1; pkt->flags = NFT_PKTINFO_L4PROTO; pkt->tprot = iph->protocol; @@ -69,6 +71,8 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt) return -1; } else if (len < thoff) { goto inhdr_error; + } else if (thoff < sizeof(*iph)) { + return -1; } pkt->flags = NFT_PKTINFO_L4PROTO; diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index ec7eaeaf4f04..467d59b9e533 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -13,7 +13,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt) unsigned short frag_off; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); - if (protohdr < 0) { + if (protohdr < 0 || thoff > U16_MAX) { nft_set_pktinfo_unspec(pkt); return; } @@ -47,7 +47,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt) return -1; protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); - if (protohdr < 0) + if (protohdr < 0 || thoff > U16_MAX) return -1; pkt->flags = NFT_PKTINFO_L4PROTO; @@ -93,7 +93,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt) } protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags); - if (protohdr < 0) + if (protohdr < 0 || thoff > U16_MAX) goto inhdr_error; pkt->flags = NFT_PKTINFO_L4PROTO; diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h index eed099eae672..167640b843ef 100644 --- a/include/net/netfilter/nft_fib.h +++ b/include/net/netfilter/nft_fib.h @@ -18,7 +18,7 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in) return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK; } -int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset); int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h index 9b51cc67de54..ba1238f12a48 100644 --- a/include/net/netfilter/nft_meta.h +++ b/include/net/netfilter/nft_meta.h @@ -24,10 +24,10 @@ int nft_meta_set_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]); int nft_meta_get_dump(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); int nft_meta_set_dump(struct sk_buff *skb, - const struct nft_expr *expr); + const struct nft_expr *expr, bool reset); void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, @@ -46,4 +46,10 @@ int nft_meta_set_validate(const struct nft_ctx *ctx, bool nft_meta_get_reduce(struct nft_regs_track *track, const struct nft_expr *expr); + +struct nft_inner_tun_ctx; +void nft_meta_inner_eval(const struct nft_expr *expr, + struct nft_regs *regs, const struct nft_pktinfo *pkt, + struct nft_inner_tun_ctx *tun_ctx); + #endif diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h index 56b123a42220..6d9ba62efd75 100644 --- a/include/net/netfilter/nft_reject.h +++ b/include/net/netfilter/nft_reject.h @@ -22,7 +22,8 @@ int nft_reject_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); -int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr); +int nft_reject_dump(struct sk_buff *skb, + const struct nft_expr *expr, bool reset); int nft_reject_icmp_code(u8 code); int nft_reject_icmpv6_code(u8 code); diff --git a/include/net/netlink.h b/include/net/netlink.h index 6bfa972f2fbf..6e1e670e06bc 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -906,6 +906,17 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) } /** + * nlmsg_seq - return the seq number of netlink message + * @nlh: netlink message header + * + * Returns 0 if netlink message is NULL + */ +static inline u32 nlmsg_seq(const struct nlmsghdr *nlh) +{ + return nlh ? nlh->nlmsg_seq : 0; +} + +/** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute * @nlh: netlink message header @@ -938,6 +949,27 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se } /** + * nlmsg_append - Add more data to a nlmsg in a skb + * @skb: socket buffer to store message in + * @size: length of message payload + * + * Append data to an existing nlmsg, used when constructing a message + * with multiple fixed-format headers (which is rare). + * Returns NULL if the tailroom of the skb is insufficient to store + * the extra payload. + */ +static inline void *nlmsg_append(struct sk_buff *skb, u32 size) +{ + if (unlikely(skb_tailroom(skb) < NLMSG_ALIGN(size))) + return NULL; + + if (NLMSG_ALIGN(size) - size) + memset(skb_tail_pointer(skb) + size, 0, + NLMSG_ALIGN(size) - size); + return __skb_put(skb, NLMSG_ALIGN(size)); +} + +/** * nlmsg_put_answer - Add a new callback based netlink message to an skb * @skb: socket buffer to store message in * @cb: netlink callback diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 1b8004679445..db762e35aca9 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -43,6 +43,7 @@ struct tcp_fastopen_context; struct netns_ipv4 { struct inet_timewait_death_row tcp_death_row; + struct udp_table *udp_table; #ifdef CONFIG_SYSCTL struct ctl_table_header *forw_hdr; @@ -183,6 +184,11 @@ struct netns_ipv4 { unsigned long tfo_active_disable_stamp; u32 tcp_challenge_timestamp; u32 tcp_challenge_count; + u8 sysctl_tcp_plb_enabled; + u8 sysctl_tcp_plb_idle_rehash_rounds; + u8 sysctl_tcp_plb_rehash_rounds; + u8 sysctl_tcp_plb_suspend_rto_sec; + int sysctl_tcp_plb_cong_thresh; int sysctl_udp_wmem_min; int sysctl_udp_rmem_min; @@ -202,6 +208,8 @@ struct netns_ipv4 { atomic_t dev_addr_genid; + unsigned int sysctl_udp_child_hash_entries; + #ifdef CONFIG_SYSCTL unsigned long *sysctl_local_reserved_ports; int sysctl_ip_prot_sock; diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index a681147aecd8..7eff3d981b89 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -175,6 +175,10 @@ struct netns_sctp { /* Threshold for autoclose timeout, in seconds. */ unsigned long max_autoclose; + +#ifdef CONFIG_NET_L3_MASTER_DEV + int l3mdev_accept; +#endif }; #endif /* __NETNS_SCTP_H__ */ diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h index e5734261ba0a..21a4f25a187a 100644 --- a/include/net/netns/xdp.h +++ b/include/net/netns/xdp.h @@ -2,8 +2,8 @@ #ifndef __NETNS_XDP_H__ #define __NETNS_XDP_H__ -#include <linux/rculist.h> #include <linux/mutex.h> +#include <linux/types.h> struct netns_xdp { struct mutex lock; diff --git a/include/net/nl802154.h b/include/net/nl802154.h index f5850b569c52..b79a89d5207c 100644 --- a/include/net/nl802154.h +++ b/include/net/nl802154.h @@ -72,6 +72,8 @@ enum nl802154_commands { NL802154_CMD_NEW_SEC_LEVEL, NL802154_CMD_DEL_SEC_LEVEL, + NL802154_CMD_SCAN_EVENT, + /* add new commands above here */ /* used to define NL802154_CMD_MAX below */ @@ -131,6 +133,8 @@ enum nl802154_attrs { NL802154_ATTR_PID, NL802154_ATTR_NETNS_FD, + NL802154_ATTR_COORDINATOR, + /* add attributes here, update the policy in nl802154.c */ #ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL @@ -217,6 +221,45 @@ enum nl802154_wpan_phy_capability_attr { }; /** + * enum nl802154_coord - Netlink attributes for a coord + * + * @__NL802154_COORD_INVALID: invalid + * @NL802154_COORD_PANID: PANID of the coordinator (2 bytes) + * @NL802154_COORD_ADDR: coordinator address, (8 bytes or 2 bytes) + * @NL802154_COORD_CHANNEL: channel number, related to @NL802154_COORD_PAGE (u8) + * @NL802154_COORD_PAGE: channel page, related to @NL802154_COORD_CHANNEL (u8) + * @NL802154_COORD_PREAMBLE_CODE: Preamble code used when the beacon was received, + * this is PHY dependent and optional (u8) + * @NL802154_COORD_MEAN_PRF: Mean PRF used when the beacon was received, + * this is PHY dependent and optional (u8) + * @NL802154_COORD_SUPERFRAME_SPEC: superframe specification of the PAN (u16) + * @NL802154_COORD_LINK_QUALITY: signal quality of beacon in unspecified units, + * scaled to 0..255 (u8) + * @NL802154_COORD_GTS_PERMIT: set to true if GTS is permitted on this PAN + * @NL802154_COORD_PAYLOAD_DATA: binary data containing the raw data from the + * frame payload, (only if beacon or probe response had data) + * @NL802154_COORD_PAD: attribute used for padding for 64-bit alignment + * @NL802154_COORD_MAX: highest coordinator attribute + */ +enum nl802154_coord { + __NL802154_COORD_INVALID, + NL802154_COORD_PANID, + NL802154_COORD_ADDR, + NL802154_COORD_CHANNEL, + NL802154_COORD_PAGE, + NL802154_COORD_PREAMBLE_CODE, + NL802154_COORD_MEAN_PRF, + NL802154_COORD_SUPERFRAME_SPEC, + NL802154_COORD_LINK_QUALITY, + NL802154_COORD_GTS_PERMIT, + NL802154_COORD_PAYLOAD_DATA, + NL802154_COORD_PAD, + + /* keep last */ + NL802154_COORD_MAX, +}; + +/** * enum nl802154_cca_modes - cca modes * * @__NL802154_CCA_INVALID: cca mode number 0 is reserved diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index bf8bb3357825..d9076a7a430c 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -186,8 +186,9 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname, const struct rtnl_link_ops *ops, struct nlattr *tb[], struct netlink_ext_ack *extack); -int rtnl_delete_link(struct net_device *dev); -int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm); +int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *nlh); +int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm, + u32 portid, const struct nlmsghdr *nlh); int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len, struct netlink_ext_ack *exterr); diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h index 5a9bb09f32b6..f514a0aa849e 100644 --- a/include/net/sctp/checksum.h +++ b/include/net/sctp/checksum.h @@ -24,7 +24,7 @@ #define __sctp_checksum_h__ #include <linux/types.h> -#include <net/sctp/sctp.h> +#include <linux/sctp.h> #include <linux/crc32c.h> #include <linux/crc32.h> diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index a04999ee99b0..c335dd01a597 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -67,11 +67,6 @@ #define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT #endif -/* Round an int up to the next multiple of 4. */ -#define SCTP_PAD4(s) (((s)+3)&~3) -/* Truncate to the previous multiple of 4. */ -#define SCTP_TRUNC4(s) ((s)&~3) - /* * Function declarations. */ @@ -114,7 +109,7 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net, struct rhashtable_iter *iter, int pos); int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr, void *p); + const union sctp_addr *paddr, void *p, int dif); int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done, struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); @@ -162,10 +157,12 @@ void sctp_unhash_transport(struct sctp_transport *t); struct sctp_transport *sctp_addrs_lookup_transport( struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr); + const union sctp_addr *paddr, + int dif, int sdif); struct sctp_transport *sctp_epaddr_lookup_transport( const struct sctp_endpoint *ep, const union sctp_addr *paddr); +bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif); /* * sctp/proc.c diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h index 65058faea4db..fa00dc20a0d7 100644 --- a/include/net/sctp/stream_sched.h +++ b/include/net/sctp/stream_sched.h @@ -28,8 +28,6 @@ struct sctp_sched_ops { int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp); /* free a stream */ void (*free_sid)(struct sctp_stream *stream, __u16 sid); - /* Frees the entire thing */ - void (*free)(struct sctp_stream *stream); /* Enqueue a chunk */ void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg); diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 350f250b0dc7..afa3781e3ca2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -477,6 +477,7 @@ struct sctp_af { int (*available) (union sctp_addr *, struct sctp_sock *); int (*skb_iif) (const struct sk_buff *sk); + int (*skb_sdif)(const struct sk_buff *sk); int (*is_ce) (const struct sk_buff *sk); void (*seq_dump_addr)(struct seq_file *seq, union sctp_addr *addr); @@ -1378,10 +1379,12 @@ struct sctp_association *sctp_endpoint_lookup_assoc( struct sctp_transport **); bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep, const union sctp_addr *paddr); -struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *, - struct net *, const union sctp_addr *); +struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep, + struct net *net, + const union sctp_addr *laddr, + int dif, int sdif); bool sctp_has_association(struct net *net, const union sctp_addr *laddr, - const union sctp_addr *paddr); + const union sctp_addr *paddr, int dif, int sdif); int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h index 0eaf8650e3b2..60f6641290c3 100644 --- a/include/net/sctp/ulpqueue.h +++ b/include/net/sctp/ulpqueue.h @@ -35,8 +35,7 @@ struct sctp_ulpq { }; /* Prototypes. */ -struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *, - struct sctp_association *); +void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc); void sctp_ulpq_flush(struct sctp_ulpq *ulpq); void sctp_ulpq_free(struct sctp_ulpq *); diff --git a/include/net/sock.h b/include/net/sock.h index e0517ecc6531..ecea3dcc2217 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -503,10 +503,10 @@ struct sock { #if BITS_PER_LONG==32 seqlock_t sk_stamp_seq; #endif - u16 sk_tsflags; - u8 sk_shutdown; atomic_t sk_tskey; atomic_t sk_zckey; + u32 sk_tsflags; + u8 sk_shutdown; u8 sk_clockid; u8 sk_txtime_deadline_mode : 1, @@ -1899,7 +1899,7 @@ static inline void sock_replace_proto(struct sock *sk, struct proto *proto) struct sockcm_cookie { u64 transmit_time; u32 mark; - u16 tsflags; + u32 tsflags; }; static inline void sockcm_init(struct sockcm_cookie *sockc, @@ -1908,7 +1908,7 @@ static inline void sockcm_init(struct sockcm_cookie *sockc, *sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags }; } -int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, +int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg, struct sockcm_cookie *sockc); int sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc); diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h index efc9085c6892..6ec140b0a61b 100644 --- a/include/net/sock_reuseport.h +++ b/include/net/sock_reuseport.h @@ -16,6 +16,7 @@ struct sock_reuseport { u16 max_socks; /* length of socks */ u16 num_socks; /* elements in socks */ u16 num_closed_socks; /* closed elements in socks */ + u16 incoming_cpu; /* The last synq overflow event timestamp of this * reuse->socks[] group. */ @@ -58,5 +59,6 @@ static inline bool reuseport_has_conns(struct sock *sk) } void reuseport_has_conns_set(struct sock *sk); +void reuseport_update_incoming_cpu(struct sock *sk, int val); #endif /* _SOCK_REUSEPORT_H */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 7dcdc97c0bc3..ca0312b78294 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -248,6 +248,7 @@ struct switchdev_notifier_fdb_info { u16 vid; u8 added_by_user:1, is_local:1, + locked:1, offloaded:1; }; diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h index 8250d6f0a462..b24ea2d9400b 100644 --- a/include/net/tc_act/tc_ct.h +++ b/include/net/tc_act/tc_ct.h @@ -10,6 +10,7 @@ #include <net/netfilter/nf_conntrack_labels.h> struct tcf_ct_params { + struct nf_conntrack_helper *helper; struct nf_conn *tmpl; u16 zone; diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h index dc1079f28e13..9649600fb3dc 100644 --- a/include/net/tc_act/tc_skbedit.h +++ b/include/net/tc_act/tc_skbedit.h @@ -95,12 +95,41 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a) return priority; } +static inline u16 tcf_skbedit_rx_queue_mapping(const struct tc_action *a) +{ + u16 rx_queue; + + rcu_read_lock(); + rx_queue = rcu_dereference(to_skbedit(a)->params)->queue_mapping; + rcu_read_unlock(); + + return rx_queue; +} + /* Return true iff action is queue_mapping */ static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a) { return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING); } +/* Return true if action is on ingress traffic */ +static inline bool is_tcf_skbedit_ingress(u32 flags) +{ + return flags & TCA_ACT_FLAGS_AT_INGRESS; +} + +static inline bool is_tcf_skbedit_tx_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_queue_mapping(a) && + !is_tcf_skbedit_ingress(a->tcfa_flags); +} + +static inline bool is_tcf_skbedit_rx_queue_mapping(const struct tc_action *a) +{ + return is_tcf_skbedit_queue_mapping(a) && + is_tcf_skbedit_ingress(a->tcfa_flags); +} + /* Return true iff action is inheritdsfield */ static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a) { diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h new file mode 100644 index 000000000000..ceed2fc089ff --- /dev/null +++ b/include/net/tc_wrapper.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __NET_TC_WRAPPER_H +#define __NET_TC_WRAPPER_H + +#include <net/pkt_cls.h> + +#if IS_ENABLED(CONFIG_RETPOLINE) + +#include <linux/cpufeature.h> +#include <linux/static_key.h> +#include <linux/indirect_call_wrapper.h> + +#define TC_INDIRECT_SCOPE + +extern struct static_key_false tc_skip_wrapper; + +/* TC Actions */ +#ifdef CONFIG_NET_CLS_ACT + +#define TC_INDIRECT_ACTION_DECLARE(fname) \ + INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ + const struct tc_action *a, \ + struct tcf_result *res)) + +TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act); +TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act); +TC_INDIRECT_ACTION_DECLARE(tcf_csum_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ct_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act); +TC_INDIRECT_ACTION_DECLARE(tcf_gact_act); +TC_INDIRECT_ACTION_DECLARE(tcf_gate_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ife_act); +TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act); +TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act); +TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act); +TC_INDIRECT_ACTION_DECLARE(tcf_nat_act); +TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act); +TC_INDIRECT_ACTION_DECLARE(tcf_police_act); +TC_INDIRECT_ACTION_DECLARE(tcf_sample_act); +TC_INDIRECT_ACTION_DECLARE(tcf_simp_act); +TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act); +TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act); +TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act); +TC_INDIRECT_ACTION_DECLARE(tunnel_key_act); + +static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + if (static_branch_likely(&tc_skip_wrapper)) + goto skip; + +#if IS_BUILTIN(CONFIG_NET_ACT_GACT) + if (a->ops->act == tcf_gact_act) + return tcf_gact_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_MIRRED) + if (a->ops->act == tcf_mirred_act) + return tcf_mirred_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT) + if (a->ops->act == tcf_pedit_act) + return tcf_pedit_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT) + if (a->ops->act == tcf_skbedit_act) + return tcf_skbedit_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD) + if (a->ops->act == tcf_skbmod_act) + return tcf_skbmod_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_POLICE) + if (a->ops->act == tcf_police_act) + return tcf_police_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_BPF) + if (a->ops->act == tcf_bpf_act) + return tcf_bpf_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK) + if (a->ops->act == tcf_connmark_act) + return tcf_connmark_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CSUM) + if (a->ops->act == tcf_csum_act) + return tcf_csum_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CT) + if (a->ops->act == tcf_ct_act) + return tcf_ct_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_CTINFO) + if (a->ops->act == tcf_ctinfo_act) + return tcf_ctinfo_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_GATE) + if (a->ops->act == tcf_gate_act) + return tcf_gate_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_MPLS) + if (a->ops->act == tcf_mpls_act) + return tcf_mpls_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_NAT) + if (a->ops->act == tcf_nat_act) + return tcf_nat_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY) + if (a->ops->act == tunnel_key_act) + return tunnel_key_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_VLAN) + if (a->ops->act == tcf_vlan_act) + return tcf_vlan_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_IFE) + if (a->ops->act == tcf_ife_act) + return tcf_ife_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_IPT) + if (a->ops->act == tcf_ipt_act) + return tcf_ipt_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SIMP) + if (a->ops->act == tcf_simp_act) + return tcf_simp_act(skb, a, res); +#endif +#if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE) + if (a->ops->act == tcf_sample_act) + return tcf_sample_act(skb, a, res); +#endif + +skip: + return a->ops->act(skb, a, res); +} + +#endif /* CONFIG_NET_CLS_ACT */ + +/* TC Filters */ +#ifdef CONFIG_NET_CLS + +#define TC_INDIRECT_FILTER_DECLARE(fname) \ + INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \ + const struct tcf_proto *tp, \ + struct tcf_result *res)) + +TC_INDIRECT_FILTER_DECLARE(basic_classify); +TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify); +TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify); +TC_INDIRECT_FILTER_DECLARE(fl_classify); +TC_INDIRECT_FILTER_DECLARE(flow_classify); +TC_INDIRECT_FILTER_DECLARE(fw_classify); +TC_INDIRECT_FILTER_DECLARE(mall_classify); +TC_INDIRECT_FILTER_DECLARE(route4_classify); +TC_INDIRECT_FILTER_DECLARE(rsvp_classify); +TC_INDIRECT_FILTER_DECLARE(rsvp6_classify); +TC_INDIRECT_FILTER_DECLARE(tcindex_classify); +TC_INDIRECT_FILTER_DECLARE(u32_classify); + +static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + if (static_branch_likely(&tc_skip_wrapper)) + goto skip; + +#if IS_BUILTIN(CONFIG_NET_CLS_BPF) + if (tp->classify == cls_bpf_classify) + return cls_bpf_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_U32) + if (tp->classify == u32_classify) + return u32_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_FLOWER) + if (tp->classify == fl_classify) + return fl_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_FW) + if (tp->classify == fw_classify) + return fw_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL) + if (tp->classify == mall_classify) + return mall_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_BASIC) + if (tp->classify == basic_classify) + return basic_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP) + if (tp->classify == cls_cgroup_classify) + return cls_cgroup_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_FLOW) + if (tp->classify == flow_classify) + return flow_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4) + if (tp->classify == route4_classify) + return route4_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_RSVP) + if (tp->classify == rsvp_classify) + return rsvp_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6) + if (tp->classify == rsvp6_classify) + return rsvp6_classify(skb, tp, res); +#endif +#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX) + if (tp->classify == tcindex_classify) + return tcindex_classify(skb, tp, res); +#endif + +skip: + return tp->classify(skb, tp, res); +} + +static inline void tc_wrapper_init(void) +{ +#ifdef CONFIG_X86 + if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE)) + static_branch_enable(&tc_skip_wrapper); +#endif +} + +#endif /* CONFIG_NET_CLS */ + +#else + +#define TC_INDIRECT_SCOPE static + +static inline int tc_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + return a->ops->act(skb, a, res); +} + +static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) +{ + return tp->classify(skb, tp, res); +} + +static inline void tc_wrapper_init(void) +{ +} + +#endif + +#endif /* __NET_TC_WRAPPER_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 14d45661a84d..db9f828e9d1e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1675,7 +1675,11 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, const struct sock *sk, const struct sk_buff *skb); int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags, - const u8 *newkey, u8 newkeylen, gfp_t gfp); + const u8 *newkey, u8 newkeylen); +int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, + int family, u8 prefixlen, int l3index, + struct tcp_md5sig_key *key); + int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, @@ -1683,7 +1687,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, #ifdef CONFIG_TCP_MD5SIG #include <linux/jump_label.h> -extern struct static_key_false tcp_md5_needed; +extern struct static_key_false_deferred tcp_md5_needed; struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family); @@ -1691,7 +1695,7 @@ static inline struct tcp_md5sig_key * tcp_md5_do_lookup(const struct sock *sk, int l3index, const union tcp_md5_addr *addr, int family) { - if (!static_branch_unlikely(&tcp_md5_needed)) + if (!static_branch_unlikely(&tcp_md5_needed.key)) return NULL; return __tcp_md5_do_lookup(sk, l3index, addr, family); } @@ -2140,6 +2144,34 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq, extern void tcp_rack_reo_timeout(struct sock *sk); extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs); +/* tcp_plb.c */ + +/* + * Scaling factor for fractions in PLB. For example, tcp_plb_update_state + * expects cong_ratio which represents fraction of traffic that experienced + * congestion over a single RTT. In order to avoid floating point operations, + * this fraction should be mapped to (1 << TCP_PLB_SCALE) and passed in. + */ +#define TCP_PLB_SCALE 8 + +/* State for PLB (Protective Load Balancing) for a single TCP connection. */ +struct tcp_plb_state { + u8 consec_cong_rounds:5, /* consecutive congested rounds */ + unused:3; + u32 pause_until; /* jiffies32 when PLB can resume rerouting */ +}; + +static inline void tcp_plb_init(const struct sock *sk, + struct tcp_plb_state *plb) +{ + plb->consec_cong_rounds = 0; + plb->pause_until = 0; +} +void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb, + const int cong_ratio); +void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb); +void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb); + /* At how many usecs into the future should the RTO fire? */ static inline s64 tcp_rto_delta_us(const struct sock *sk) { @@ -2291,8 +2323,8 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore); void tcp_bpf_clone(const struct sock *sk, struct sock *newsk); #endif /* CONFIG_BPF_SYSCALL */ -int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes, - int flags); +int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress, + struct sk_msg *msg, u32 bytes, int flags); #endif /* CONFIG_NET_SOCK_MSG */ #if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG) diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index b830463e3dff..d27b1caf3753 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -58,8 +58,6 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp, #define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006) -void inet6_destroy_sock(struct sock *sk); - #define IPV6_SEQ_DGRAM_HEADER \ " sl " \ "local_address " \ diff --git a/include/net/tso.h b/include/net/tso.h index 62c98a9c60f1..e7e157ae0526 100644 --- a/include/net/tso.h +++ b/include/net/tso.h @@ -2,6 +2,7 @@ #ifndef _TSO_H #define _TSO_H +#include <linux/skbuff.h> #include <net/ip.h> #define TSO_HEADER_SIZE 256 @@ -16,7 +17,12 @@ struct tso_t { u32 tcp_seq; }; -int tso_count_descs(const struct sk_buff *skb); +/* Calculate the worst case buffer count */ +static inline int tso_count_descs(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags; +} + void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, int size, bool is_last); void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size); diff --git a/include/net/udp.h b/include/net/udp.h index fee053bcd17c..de4b528522bb 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -174,6 +174,15 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *)); struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, netdev_features_t features, bool is_ipv6); +static inline void udp_lib_init_sock(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + + skb_queue_head_init(&up->reader_queue); + up->forward_threshold = sk->sk_rcvbuf >> 2; + set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags); +} + /* hash routines shared between UDPv4/6 and UDP-Litev4/6 */ static inline int udp_lib_hash(struct sock *sk) { diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index 72394f441dad..0ca9b7a11baf 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -68,8 +68,8 @@ typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, - struct sk_buff *skb, - unsigned int udp_offset); + struct sk_buff *skb, int err, + __be16 port, u32 info, u8 *payload); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dbc81f5eb553..3e1f70e8e424 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -129,6 +129,13 @@ struct xfrm_state_walk { enum { XFRM_DEV_OFFLOAD_IN = 1, XFRM_DEV_OFFLOAD_OUT, + XFRM_DEV_OFFLOAD_FWD, +}; + +enum { + XFRM_DEV_OFFLOAD_UNSPECIFIED, + XFRM_DEV_OFFLOAD_CRYPTO, + XFRM_DEV_OFFLOAD_PACKET, }; struct xfrm_dev_offload { @@ -137,6 +144,7 @@ struct xfrm_dev_offload { struct net_device *real_dev; unsigned long offload_handle; u8 dir : 2; + u8 type : 2; }; struct xfrm_mode { @@ -534,6 +542,8 @@ struct xfrm_policy { struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH]; struct hlist_node bydst_inexact_list; struct rcu_head rcu; + + struct xfrm_dev_offload xdo; }; static inline struct net *xp_net(const struct xfrm_policy *xp) @@ -1093,6 +1103,29 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un } #ifdef CONFIG_XFRM +static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) +{ + struct sec_path *sp = skb_sec_path(skb); + + return sp->xvec[sp->len - 1]; +} +#endif + +static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) +{ +#ifdef CONFIG_XFRM + struct sec_path *sp = skb_sec_path(skb); + + if (!sp || !sp->olen || sp->len != sp->olen) + return NULL; + + return &sp->ovec[sp->olen - 1]; +#else + return NULL; +#endif +} + +#ifdef CONFIG_XFRM int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family); @@ -1123,10 +1156,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir, { struct net *net = dev_net(skb->dev); int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0); + struct xfrm_offload *xo = xfrm_offload(skb); + struct xfrm_state *x; if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, ndir, skb, family); + if (xo) { + x = xfrm_input_state(skb); + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) + return (xo->flags & CRYPTO_DONE) && + (xo->status & CRYPTO_SUCCESS); + } + return __xfrm_check_nopolicy(net, skb, dir) || __xfrm_check_dev_nopolicy(skb, dir, family) || __xfrm_policy_check(sk, ndir, skb, family); @@ -1529,6 +1571,23 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id, struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi, unsigned short family); int xfrm_state_check_expire(struct xfrm_state *x); +#ifdef CONFIG_XFRM_OFFLOAD +static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) +{ + struct xfrm_dev_offload *xdo = &x->xso; + struct net_device *dev = xdo->dev; + + if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) + return; + + if (dev && dev->xfrmdev_ops && + dev->xfrmdev_ops->xdo_dev_state_update_curlft) + dev->xfrmdev_ops->xdo_dev_state_update_curlft(x); + +} +#else +static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {} +#endif void xfrm_state_insert(struct xfrm_state *x); int xfrm_state_add(struct xfrm_state *x); int xfrm_state_update(struct xfrm_state *x); @@ -1578,6 +1637,8 @@ struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_delete(struct xfrm_state *x); int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync); int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid); +int xfrm_dev_policy_flush(struct net *net, struct net_device *dev, + bool task_valid); void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si); void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si); u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq); @@ -1681,8 +1742,9 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, int xfrm_policy_flush(struct net *net, u8 type, bool task_valid); void xfrm_policy_hash_rebuild(struct net *net); u32 xfrm_get_acqseq(void); -int verify_spi_info(u8 proto, u32 min, u32 max); -int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); +int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack); +int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi, + struct netlink_ext_ack *extack); struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark, u8 mode, u32 reqid, u32 if_id, u8 proto, const xfrm_address_t *daddr, @@ -1703,7 +1765,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x, int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_bundles, struct xfrm_kmaddress *k, struct net *net, - struct xfrm_encap_tmpl *encap, u32 if_id); + struct xfrm_encap_tmpl *encap, u32 if_id, + struct netlink_ext_ack *extack); #endif int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); @@ -1858,29 +1921,6 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n) } #endif -#ifdef CONFIG_XFRM -static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb) -{ - struct sec_path *sp = skb_sec_path(skb); - - return sp->xvec[sp->len - 1]; -} -#endif - -static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb) -{ -#ifdef CONFIG_XFRM - struct sec_path *sp = skb_sec_path(skb); - - if (!sp || !sp->olen || sp->len != sp->olen) - return NULL; - - return &sp->ovec[sp->olen - 1]; -#else - return NULL; -#endif -} - void __init xfrm_dev_init(void); #ifdef CONFIG_XFRM_OFFLOAD @@ -1890,6 +1930,9 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, struct xfrm_user_offload *xuo, struct netlink_ext_ack *extack); +int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, + struct xfrm_user_offload *xuo, u8 dir, + struct netlink_ext_ack *extack); bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x); static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x) @@ -1938,6 +1981,28 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) netdev_put(dev, &xso->dev_tracker); } } + +static inline void xfrm_dev_policy_delete(struct xfrm_policy *x) +{ + struct xfrm_dev_offload *xdo = &x->xdo; + struct net_device *dev = xdo->dev; + + if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_policy_delete) + dev->xfrmdev_ops->xdo_dev_policy_delete(x); +} + +static inline void xfrm_dev_policy_free(struct xfrm_policy *x) +{ + struct xfrm_dev_offload *xdo = &x->xdo; + struct net_device *dev = xdo->dev; + + if (dev && dev->xfrmdev_ops) { + if (dev->xfrmdev_ops->xdo_dev_policy_free) + dev->xfrmdev_ops->xdo_dev_policy_free(x); + xdo->dev = NULL; + netdev_put(dev, &xdo->dev_tracker); + } +} #else static inline void xfrm_dev_resume(struct sk_buff *skb) { @@ -1965,6 +2030,21 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x) { } +static inline int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp, + struct xfrm_user_offload *xuo, u8 dir, + struct netlink_ext_ack *extack) +{ + return 0; +} + +static inline void xfrm_dev_policy_delete(struct xfrm_policy *x) +{ +} + +static inline void xfrm_dev_policy_free(struct xfrm_policy *x) +{ +} + static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x) { return false; @@ -2084,4 +2164,21 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk) return false; } #endif + +#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \ + (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES)) + +extern struct metadata_dst __percpu *xfrm_bpf_md_dst; + +int register_xfrm_interface_bpf(void); + +#else + +static inline int register_xfrm_interface_bpf(void) +{ + return 0; +} + +#endif + #endif /* _NET_XFRM_H */ |