summaryrefslogtreecommitdiff
path: root/include/net
diff options
context:
space:
mode:
Diffstat (limited to 'include/net')
-rw-r--r--include/net/act_api.h11
-rw-r--r--include/net/af_rxrpc.h2
-rw-r--r--include/net/bluetooth/hci.h21
-rw-r--r--include/net/bluetooth/hci_core.h8
-rw-r--r--include/net/bond_alb.h4
-rw-r--r--include/net/bonding.h4
-rw-r--r--include/net/cfg80211-wext.h20
-rw-r--r--include/net/cfg80211.h15
-rw-r--r--include/net/cfg802154.h38
-rw-r--r--include/net/dcbnl.h4
-rw-r--r--include/net/devlink.h118
-rw-r--r--include/net/dropreason.h14
-rw-r--r--include/net/dsa.h76
-rw-r--r--include/net/dst.h5
-rw-r--r--include/net/dst_metadata.h1
-rw-r--r--include/net/flow_offload.h8
-rw-r--r--include/net/fq_impl.h16
-rw-r--r--include/net/genetlink.h79
-rw-r--r--include/net/geneve.h2
-rw-r--r--include/net/ieee802154_netdev.h8
-rw-r--r--include/net/inet_frag.h6
-rw-r--r--include/net/ip_vs.h171
-rw-r--r--include/net/ipv6.h33
-rw-r--r--include/net/ipv6_frag.h3
-rw-r--r--include/net/mac80211.h78
-rw-r--r--include/net/mac802154.h31
-rw-r--r--include/net/mana/gdma.h841
-rw-r--r--include/net/mana/hw_channel.h195
-rw-r--r--include/net/mana/mana.h648
-rw-r--r--include/net/mana/mana_auxiliary.h10
-rw-r--r--include/net/mana/shm_channel.h21
-rw-r--r--include/net/mptcp.h12
-rw-r--r--include/net/mrp.h1
-rw-r--r--include/net/net_namespace.h30
-rw-r--r--include/net/netfilter/nf_conntrack_core.h3
-rw-r--r--include/net/netfilter/nf_conntrack_helper.h5
-rw-r--r--include/net/netfilter/nf_nat.h4
-rw-r--r--include/net/netfilter/nf_tables.h15
-rw-r--r--include/net/netfilter/nf_tables_core.h36
-rw-r--r--include/net/netfilter/nf_tables_ipv4.h4
-rw-r--r--include/net/netfilter/nf_tables_ipv6.h6
-rw-r--r--include/net/netfilter/nft_fib.h2
-rw-r--r--include/net/netfilter/nft_meta.h10
-rw-r--r--include/net/netfilter/nft_reject.h3
-rw-r--r--include/net/netlink.h32
-rw-r--r--include/net/netns/ipv4.h8
-rw-r--r--include/net/netns/sctp.h4
-rw-r--r--include/net/netns/xdp.h2
-rw-r--r--include/net/nl802154.h43
-rw-r--r--include/net/rtnetlink.h5
-rw-r--r--include/net/sctp/checksum.h2
-rw-r--r--include/net/sctp/sctp.h11
-rw-r--r--include/net/sctp/stream_sched.h2
-rw-r--r--include/net/sctp/structs.h9
-rw-r--r--include/net/sctp/ulpqueue.h3
-rw-r--r--include/net/sock.h8
-rw-r--r--include/net/sock_reuseport.h2
-rw-r--r--include/net/switchdev.h1
-rw-r--r--include/net/tc_act/tc_ct.h1
-rw-r--r--include/net/tc_act/tc_skbedit.h29
-rw-r--r--include/net/tc_wrapper.h251
-rw-r--r--include/net/tcp.h42
-rw-r--r--include/net/transp_v6.h2
-rw-r--r--include/net/tso.h8
-rw-r--r--include/net/udp.h9
-rw-r--r--include/net/udp_tunnel.h4
-rw-r--r--include/net/xfrm.h149
67 files changed, 2964 insertions, 285 deletions
diff --git a/include/net/act_api.h b/include/net/act_api.h
index 61f2ceb3939e..2a6f443f0ef6 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -67,6 +67,7 @@ struct tc_action {
#define TCA_ACT_FLAGS_BIND (1U << (TCA_ACT_FLAGS_USER_BITS + 1))
#define TCA_ACT_FLAGS_REPLACE (1U << (TCA_ACT_FLAGS_USER_BITS + 2))
#define TCA_ACT_FLAGS_NO_RTNL (1U << (TCA_ACT_FLAGS_USER_BITS + 3))
+#define TCA_ACT_FLAGS_AT_INGRESS (1U << (TCA_ACT_FLAGS_USER_BITS + 4))
/* Update lastuse only if needed, to avoid dirtying a cache line.
* We use a temp variable to avoid fetching jiffies twice.
@@ -100,11 +101,6 @@ static inline enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
return hw_stats;
}
-#ifdef CONFIG_NET_CLS_ACT
-
-#define ACT_P_CREATED 1
-#define ACT_P_DELETED 1
-
typedef void (*tc_action_priv_destructor)(void *priv);
struct tc_action_ops {
@@ -139,6 +135,11 @@ struct tc_action_ops {
struct netlink_ext_ack *extack);
};
+#ifdef CONFIG_NET_CLS_ACT
+
+#define ACT_P_CREATED 1
+#define ACT_P_DELETED 1
+
struct tc_action_net {
struct tcf_idrinfo *idrinfo;
const struct tc_action_ops *ops;
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index b69ca695935c..d5a5ae926380 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -66,10 +66,10 @@ int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t,
void rxrpc_kernel_set_tx_length(struct socket *, struct rxrpc_call *, s64);
bool rxrpc_kernel_check_life(const struct socket *, const struct rxrpc_call *);
u32 rxrpc_kernel_get_epoch(struct socket *, struct rxrpc_call *);
-bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
unsigned long);
int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
+int rxrpc_sock_set_security_keyring(struct sock *, struct key *);
#endif /* _NET_RXRPC_H */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 684f1cd28730..8d773b042c85 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -274,6 +274,26 @@ enum {
* during the hdev->setup vendor callback.
*/
HCI_QUIRK_BROKEN_ENHANCED_SETUP_SYNC_CONN,
+
+ /*
+ * When this quirk is set, the HCI_OP_LE_SET_EXT_SCAN_ENABLE command is
+ * disabled. This is required for some Broadcom controllers which
+ * erroneously claim to support extended scanning.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_EXT_SCAN,
+
+ /*
+ * When this quirk is set, the HCI_OP_GET_MWS_TRANSPORT_CONFIG command is
+ * disabled. This is required for some Broadcom controllers which
+ * erroneously claim to support MWS Transport Layer Configuration.
+ *
+ * This quirk can be set before hci_register_dev is called or
+ * during the hdev->setup vendor callback.
+ */
+ HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG,
};
/* HCI device flags */
@@ -2590,6 +2610,7 @@ struct hci_ev_le_conn_complete {
#define LE_EXT_ADV_DIRECT_IND 0x0004
#define LE_EXT_ADV_SCAN_RSP 0x0008
#define LE_EXT_ADV_LEGACY_PDU 0x0010
+#define LE_EXT_ADV_EVT_TYPE_MASK 0x007f
#define ADDR_LE_DEV_PUBLIC 0x00
#define ADDR_LE_DEV_RANDOM 0x01
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index c54bc71254af..7254edfba4c9 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -659,6 +659,7 @@ struct hci_dev {
int (*set_diag)(struct hci_dev *hdev, bool enable);
int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
void (*cmd_timeout)(struct hci_dev *hdev);
+ void (*reset)(struct hci_dev *hdev);
bool (*wakeup)(struct hci_dev *hdev);
int (*set_quality_report)(struct hci_dev *hdev, bool enable);
int (*get_data_path_id)(struct hci_dev *hdev, __u8 *data_path);
@@ -1689,7 +1690,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
/* Use ext scanning if set ext scan param and ext scan enable is supported */
#define use_ext_scan(dev) (((dev)->commands[37] & 0x20) && \
- ((dev)->commands[37] & 0x40))
+ ((dev)->commands[37] & 0x40) && \
+ !test_bit(HCI_QUIRK_BROKEN_EXT_SCAN, &(dev)->quirks))
+
/* Use ext create connection if command is supported */
#define use_ext_conn(dev) ((dev)->commands[37] & 0x80)
@@ -1717,6 +1720,9 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
((dev)->le_features[3] & HCI_LE_CIS_PERIPHERAL)
#define bis_capable(dev) ((dev)->le_features[3] & HCI_LE_ISO_BROADCASTER)
+#define mws_transport_config_capable(dev) (((dev)->commands[30] & 0x08) && \
+ (!test_bit(HCI_QUIRK_BROKEN_MWS_TRANSPORT_CONFIG, &(dev)->quirks)))
+
/* ----- HCI protocols ----- */
#define HCI_PROTO_DEFER 0x01
diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index 191c36afa1f4..9dc082b2d543 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -156,8 +156,8 @@ int bond_alb_init_slave(struct bonding *bond, struct slave *slave);
void bond_alb_deinit_slave(struct bonding *bond, struct slave *slave);
void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char link);
void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
-int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
-int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
+netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
+netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
struct sk_buff *skb);
struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
diff --git a/include/net/bonding.h b/include/net/bonding.h
index e999f851738b..ea36ab7f9e72 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -92,8 +92,6 @@
#define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \
NETIF_F_GSO_ESP)
-#define BOND_TLS_FEATURES (NETIF_F_HW_TLS_TX | NETIF_F_HW_TLS_RX)
-
#ifdef CONFIG_NET_POLL_CONTROLLER
extern atomic_t netpoll_block_tx;
@@ -280,8 +278,6 @@ struct bond_vlan_tag {
unsigned short vlan_id;
};
-bool bond_sk_check(struct bonding *bond);
-
/**
* Returns NULL if the net_device does not belong to any of the bond's slaves
*
diff --git a/include/net/cfg80211-wext.h b/include/net/cfg80211-wext.h
index ad77caf2ffde..0ee36d97e068 100644
--- a/include/net/cfg80211-wext.h
+++ b/include/net/cfg80211-wext.h
@@ -19,34 +19,34 @@
*/
int cfg80211_wext_giwname(struct net_device *dev,
struct iw_request_info *info,
- char *name, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_siwmode(struct net_device *dev, struct iw_request_info *info,
- u32 *mode, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_giwmode(struct net_device *dev, struct iw_request_info *info,
- u32 *mode, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_siwscan(struct net_device *dev,
struct iw_request_info *info,
union iwreq_data *wrqu, char *extra);
int cfg80211_wext_giwscan(struct net_device *dev,
struct iw_request_info *info,
- struct iw_point *data, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_giwrange(struct net_device *dev,
struct iw_request_info *info,
- struct iw_point *data, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_siwrts(struct net_device *dev,
struct iw_request_info *info,
- struct iw_param *rts, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_giwrts(struct net_device *dev,
struct iw_request_info *info,
- struct iw_param *rts, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_siwfrag(struct net_device *dev,
struct iw_request_info *info,
- struct iw_param *frag, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_giwfrag(struct net_device *dev,
struct iw_request_info *info,
- struct iw_param *frag, char *extra);
+ union iwreq_data *wrqu, char *extra);
int cfg80211_wext_giwretry(struct net_device *dev,
struct iw_request_info *info,
- struct iw_param *retry, char *extra);
+ union iwreq_data *wrqu, char *extra);
#endif /* __NET_CFG80211_WEXT_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e09ff87146c1..03d4f4deadae 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2105,6 +2105,7 @@ struct mpath_info {
*
* Used to change BSS parameters (mainly for AP mode).
*
+ * @link_id: link_id or -1 for non-MLD
* @use_cts_prot: Whether to use CTS protection
* (0 = no, 1 = yes, -1 = do not change)
* @use_short_preamble: Whether the use of short preambles is allowed
@@ -2122,6 +2123,7 @@ struct mpath_info {
* @p2p_opp_ps: P2P opportunistic PS (-1 = no change)
*/
struct bss_parameters {
+ int link_id;
int use_cts_prot;
int use_short_preamble;
int use_short_slot_time;
@@ -4740,7 +4742,7 @@ struct ieee80211_iface_limit {
*
* struct ieee80211_iface_limit limits1[] = {
* { .max = 1, .types = BIT(NL80211_IFTYPE_STATION), },
- * { .max = 1, .types = BIT(NL80211_IFTYPE_AP}, },
+ * { .max = 1, .types = BIT(NL80211_IFTYPE_AP), },
* };
* struct ieee80211_iface_combination combination1 = {
* .limits = limits1,
@@ -6933,6 +6935,8 @@ void cfg80211_auth_timeout(struct net_device *dev, const u8 *addr);
* @ap_mld_addr: AP MLD address (in case of MLO)
* @links: per-link information indexed by link ID, use links[0] for
* non-MLO connections
+ * @links.status: Set this (along with a BSS pointer) for links that
+ * were rejected by the AP.
*/
struct cfg80211_rx_assoc_resp {
const u8 *buf;
@@ -6944,6 +6948,7 @@ struct cfg80211_rx_assoc_resp {
struct {
const u8 *addr;
struct cfg80211_bss *bss;
+ u16 status;
} links[IEEE80211_MLD_MAX_NUM_LINKS];
};
@@ -7454,6 +7459,9 @@ struct cfg80211_fils_resp_params {
* if the bss is expired during the connection, esp. for those drivers
* implementing connect op. Only one parameter among @bssid and @bss needs
* to be specified.
+ * @links.status: per-link status code, to report a status code that's not
+ * %WLAN_STATUS_SUCCESS for a given link, it must also be in the
+ * @valid_links bitmap and may have a BSS pointer (which is then released)
*/
struct cfg80211_connect_resp_params {
int status;
@@ -7470,6 +7478,7 @@ struct cfg80211_connect_resp_params {
const u8 *addr;
const u8 *bssid;
struct cfg80211_bss *bss;
+ u16 status;
} links[IEEE80211_MLD_MAX_NUM_LINKS];
};
@@ -7674,6 +7683,8 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
*
* @dev: network device
* @bssid: the BSSID of the AP
+ * @td_bitmap: transition disable policy
+ * @td_bitmap_len: Length of transition disable policy
* @gfp: allocation flags
*
* This function should be called by a driver that supports 4 way handshake
@@ -7684,7 +7695,7 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info,
* indicate the 802.11 association.
*/
void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid,
- gfp_t gfp);
+ const u8* td_bitmap, u8 td_bitmap_len, gfp_t gfp);
/**
* cfg80211_disconnected - notify cfg80211 that connection was dropped
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index d8d8719315fd..d09c393d229f 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -11,7 +11,7 @@
#include <linux/ieee802154.h>
#include <linux/netdevice.h>
-#include <linux/mutex.h>
+#include <linux/spinlock.h>
#include <linux/bug.h>
#include <net/nl802154.h>
@@ -166,11 +166,14 @@ wpan_phy_cca_cmp(const struct wpan_phy_cca *a, const struct wpan_phy_cca *b)
* level setting.
* @WPAN_PHY_FLAG_CCA_MODE: Indicates that transceiver will support cca mode
* setting.
+ * @WPAN_PHY_FLAG_STATE_QUEUE_STOPPED: Indicates that the transmit queue was
+ * temporarily stopped.
*/
enum wpan_phy_flags {
WPAN_PHY_FLAG_TXPOWER = BIT(1),
WPAN_PHY_FLAG_CCA_ED_LEVEL = BIT(2),
WPAN_PHY_FLAG_CCA_MODE = BIT(3),
+ WPAN_PHY_FLAG_STATE_QUEUE_STOPPED = BIT(4),
};
struct wpan_phy {
@@ -182,7 +185,7 @@ struct wpan_phy {
*/
const void *privid;
- u32 flags;
+ unsigned long flags;
/*
* This is a PIB according to 802.15.4-2011.
@@ -214,6 +217,17 @@ struct wpan_phy {
/* the network namespace this phy lives in currently */
possible_net_t _net;
+ /* Transmission monitoring and control */
+ spinlock_t queue_lock;
+ atomic_t ongoing_txs;
+ atomic_t hold_txs;
+ wait_queue_head_t sync_txq;
+
+ /* Current filtering level on reception.
+ * Only allowed to be changed if phy is not operational.
+ */
+ enum ieee802154_filtering_level filtering;
+
char priv[] __aligned(NETDEV_ALIGN);
};
@@ -246,6 +260,24 @@ struct ieee802154_addr {
};
};
+/**
+ * struct ieee802154_coord_desc - Coordinator descriptor
+ * @addr: PAN ID and coordinator address
+ * @page: page this coordinator is using
+ * @channel: channel this coordinator is using
+ * @superframe_spec: SuperFrame specification as received
+ * @link_quality: link quality indicator at which the beacon was received
+ * @gts_permit: the coordinator accepts GTS requests
+ */
+struct ieee802154_coord_desc {
+ struct ieee802154_addr addr;
+ u8 page;
+ u8 channel;
+ u16 superframe_spec;
+ u8 link_quality;
+ bool gts_permit;
+};
+
struct ieee802154_llsec_key_id {
u8 mode;
u8 id;
@@ -365,8 +397,6 @@ struct wpan_dev {
bool lbt;
- bool promiscuous_mode;
-
/* fallback for acknowledgment bit setting */
bool ackreq;
};
diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 2b2d86fb3131..8841ab6c2de7 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -109,6 +109,10 @@ struct dcbnl_rtnl_ops {
/* buffer settings */
int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *);
int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *);
+
+ /* apptrust */
+ int (*dcbnl_setapptrust)(struct net_device *, u8 *, int);
+ int (*dcbnl_getapptrust)(struct net_device *, u8 *, int *);
};
#endif /* __NET_DCBNL_H__ */
diff --git a/include/net/devlink.h b/include/net/devlink.h
index ba6b8b094943..6a2e4f21779f 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -114,6 +114,9 @@ struct devlink_rate {
refcount_t refcnt;
};
};
+
+ u32 tx_priority;
+ u32 tx_weight;
};
struct devlink_port {
@@ -121,12 +124,21 @@ struct devlink_port {
struct list_head region_list;
struct devlink *devlink;
unsigned int index;
- spinlock_t type_lock; /* Protects type and type_dev
- * pointer consistency.
+ spinlock_t type_lock; /* Protects type and type_eth/ib
+ * structures consistency.
*/
enum devlink_port_type type;
enum devlink_port_type desired_type;
- void *type_dev;
+ union {
+ struct {
+ struct net_device *netdev;
+ int ifindex;
+ char ifname[IFNAMSIZ];
+ } type_eth;
+ struct {
+ struct ib_device *ibdev;
+ } type_ib;
+ };
struct devlink_port_attrs attrs;
u8 attrs_set:1,
switch_port:1,
@@ -609,6 +621,8 @@ enum devlink_param_generic_id {
#define DEVLINK_INFO_VERSION_GENERIC_FW_ROCE "fw.roce"
/* Firmware bundle identifier */
#define DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID "fw.bundle_id"
+/* Bootloader */
+#define DEVLINK_INFO_VERSION_GENERIC_FW_BOOTLOADER "fw.bootloader"
/**
* struct devlink_flash_update_params - Flash Update parameters
@@ -638,6 +652,10 @@ struct devlink_info_req;
* the data variable must be updated to point to the snapshot data.
* The function will be called while the devlink instance lock is
* held.
+ * @read: callback to directly read a portion of the region. On success,
+ * the data pointer will be updated with the contents of the
+ * requested portion of the region. The function will be called
+ * while the devlink instance lock is held.
* @priv: Pointer to driver private data for the region operation
*/
struct devlink_region_ops {
@@ -647,6 +665,10 @@ struct devlink_region_ops {
const struct devlink_region_ops *ops,
struct netlink_ext_ack *extack,
u8 **data);
+ int (*read)(struct devlink *devlink,
+ const struct devlink_region_ops *ops,
+ struct netlink_ext_ack *extack,
+ u64 offset, u32 size, u8 *data);
void *priv;
};
@@ -658,6 +680,10 @@ struct devlink_region_ops {
* the data variable must be updated to point to the snapshot data.
* The function will be called while the devlink instance lock is
* held.
+ * @read: callback to directly read a portion of the region. On success,
+ * the data pointer will be updated with the contents of the
+ * requested portion of the region. The function will be called
+ * while the devlink instance lock is held.
* @priv: Pointer to driver private data for the region operation
*/
struct devlink_port_region_ops {
@@ -667,6 +693,10 @@ struct devlink_port_region_ops {
const struct devlink_port_region_ops *ops,
struct netlink_ext_ack *extack,
u8 **data);
+ int (*read)(struct devlink_port *port,
+ const struct devlink_port_region_ops *ops,
+ struct netlink_ext_ack *extack,
+ u64 offset, u32 size, u8 *data);
void *priv;
};
@@ -885,6 +915,8 @@ enum devlink_trap_generic_id {
DEVLINK_TRAP_GENERIC_ID_ESP_PARSING,
DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_NEXTHOP,
DEVLINK_TRAP_GENERIC_ID_DMAC_FILTER,
+ DEVLINK_TRAP_GENERIC_ID_EAPOL,
+ DEVLINK_TRAP_GENERIC_ID_LOCKED_PORT,
/* Add new generic trap IDs above */
__DEVLINK_TRAP_GENERIC_ID_MAX,
@@ -921,6 +953,7 @@ enum devlink_trap_group_generic_id {
DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE,
DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP,
DEVLINK_TRAP_GROUP_GENERIC_ID_PARSER_ERROR_DROPS,
+ DEVLINK_TRAP_GROUP_GENERIC_ID_EAPOL,
/* Add new generic trap group IDs above */
__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
@@ -1112,6 +1145,10 @@ enum devlink_trap_group_generic_id {
"blackhole_nexthop"
#define DEVLINK_TRAP_GENERIC_NAME_DMAC_FILTER \
"dmac_filter"
+#define DEVLINK_TRAP_GENERIC_NAME_EAPOL \
+ "eapol"
+#define DEVLINK_TRAP_GENERIC_NAME_LOCKED_PORT \
+ "locked_port"
#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
"l2_drops"
@@ -1165,6 +1202,8 @@ enum devlink_trap_group_generic_id {
"acl_trap"
#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PARSER_ERROR_DROPS \
"parser_error_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_EAPOL \
+ "eapol"
#define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id, \
_metadata_cap) \
@@ -1415,6 +1454,45 @@ struct devlink_ops {
const u8 *hw_addr, int hw_addr_len,
struct netlink_ext_ack *extack);
/**
+ * @port_fn_roce_get: Port function's roce get function.
+ *
+ * Query RoCE state of a function managed by the devlink port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_fn_roce_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_roce_set: Port function's roce set function.
+ *
+ * Enable/Disable the RoCE state of a function managed by the devlink
+ * port.
+ * Return -EOPNOTSUPP if port function RoCE handling is not supported.
+ */
+ int (*port_fn_roce_set)(struct devlink_port *devlink_port,
+ bool enable, struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_migratable_get: Port function's migratable get function.
+ *
+ * Query migratable state of a function managed by the devlink port.
+ * Return -EOPNOTSUPP if port function migratable handling is not
+ * supported.
+ */
+ int (*port_fn_migratable_get)(struct devlink_port *devlink_port,
+ bool *is_enable,
+ struct netlink_ext_ack *extack);
+ /**
+ * @port_fn_migratable_set: Port function's migratable set function.
+ *
+ * Enable/Disable migratable state of a function managed by the devlink
+ * port.
+ * Return -EOPNOTSUPP if port function migratable handling is not
+ * supported.
+ */
+ int (*port_fn_migratable_set)(struct devlink_port *devlink_port,
+ bool enable,
+ struct netlink_ext_ack *extack);
+ /**
* port_new() - Add a new port function of a specified flavor
* @devlink: Devlink instance
* @attrs: attributes of the new port
@@ -1493,10 +1571,18 @@ struct devlink_ops {
u64 tx_share, struct netlink_ext_ack *extack);
int (*rate_leaf_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_leaf_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_priority, struct netlink_ext_ack *extack);
+ int (*rate_leaf_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_weight, struct netlink_ext_ack *extack);
int (*rate_node_tx_share_set)(struct devlink_rate *devlink_rate, void *priv,
u64 tx_share, struct netlink_ext_ack *extack);
int (*rate_node_tx_max_set)(struct devlink_rate *devlink_rate, void *priv,
u64 tx_max, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_priority_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_priority, struct netlink_ext_ack *extack);
+ int (*rate_node_tx_weight_set)(struct devlink_rate *devlink_rate, void *priv,
+ u32 tx_weight, struct netlink_ext_ack *extack);
int (*rate_node_new)(struct devlink_rate *rate_node, void **priv,
struct netlink_ext_ack *extack);
int (*rate_node_del)(struct devlink_rate *rate_node, void *priv,
@@ -1575,8 +1661,7 @@ int devlink_port_register(struct devlink *devlink,
unsigned int port_index);
void devl_port_unregister(struct devlink_port *devlink_port);
void devlink_port_unregister(struct devlink_port *devlink_port);
-void devlink_port_type_eth_set(struct devlink_port *devlink_port,
- struct net_device *netdev);
+void devlink_port_type_eth_set(struct devlink_port *devlink_port);
void devlink_port_type_ib_set(struct devlink_port *devlink_port,
struct ib_device *ibdev);
void devlink_port_type_clear(struct devlink_port *devlink_port);
@@ -1589,7 +1674,12 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro
void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port,
u32 controller, u16 pf, u32 sf,
bool external);
-int devl_rate_leaf_create(struct devlink_port *port, void *priv);
+struct devlink_rate *
+devl_rate_node_create(struct devlink *devlink, void *priv, char *node_name,
+ struct devlink_rate *parent);
+int
+devl_rate_leaf_create(struct devlink_port *devlink_port, void *priv,
+ struct devlink_rate *parent);
void devl_rate_leaf_destroy(struct devlink_port *devlink_port);
void devl_rate_nodes_destroy(struct devlink *devlink);
void devlink_port_linecard_set(struct devlink_port *devlink_port,
@@ -1713,8 +1803,6 @@ int devlink_region_snapshot_create(struct devlink_region *region,
u8 *data, u32 snapshot_id);
int devlink_info_serial_number_put(struct devlink_info_req *req,
const char *sn);
-int devlink_info_driver_name_put(struct devlink_info_req *req,
- const char *name);
int devlink_info_board_serial_number_put(struct devlink_info_req *req,
const char *bsn);
@@ -1865,6 +1953,9 @@ int devlink_compat_phys_port_name_get(struct net_device *dev,
int devlink_compat_switch_id_get(struct net_device *dev,
struct netdev_phys_item_id *ppid);
+int devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port);
+size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port);
+
#else
static inline struct devlink *devlink_try_get(struct devlink *devlink)
@@ -1901,6 +1992,17 @@ devlink_compat_switch_id_get(struct net_device *dev,
return -EOPNOTSUPP;
}
+static inline int
+devlink_nl_port_handle_fill(struct sk_buff *msg, struct devlink_port *devlink_port)
+{
+ return 0;
+}
+
+static inline size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+{
+ return 0;
+}
+
#endif
#endif /* _NET_DEVLINK_H_ */
diff --git a/include/net/dropreason.h b/include/net/dropreason.h
index c1cbcdbaf149..70539288f995 100644
--- a/include/net/dropreason.h
+++ b/include/net/dropreason.h
@@ -68,6 +68,9 @@
FN(IP_INADDRERRORS) \
FN(IP_INNOROUTES) \
FN(PKT_TOO_BIG) \
+ FN(DUP_FRAG) \
+ FN(FRAG_REASM_TIMEOUT) \
+ FN(FRAG_TOO_FAR) \
FNe(MAX)
/**
@@ -80,6 +83,8 @@ enum skb_drop_reason {
* @SKB_NOT_DROPPED_YET: skb is not dropped yet (used for no-drop case)
*/
SKB_NOT_DROPPED_YET = 0,
+ /** @SKB_CONSUMED: packet has been consumed */
+ SKB_CONSUMED,
/** @SKB_DROP_REASON_NOT_SPECIFIED: drop reason is not specified */
SKB_DROP_REASON_NOT_SPECIFIED,
/** @SKB_DROP_REASON_NO_SOCKET: socket not found */
@@ -298,6 +303,15 @@ enum skb_drop_reason {
* MTU)
*/
SKB_DROP_REASON_PKT_TOO_BIG,
+ /** @SKB_DROP_REASON_DUP_FRAG: duplicate fragment */
+ SKB_DROP_REASON_DUP_FRAG,
+ /** @SKB_DROP_REASON_FRAG_REASM_TIMEOUT: fragment reassembly timeout */
+ SKB_DROP_REASON_FRAG_REASM_TIMEOUT,
+ /**
+ * @SKB_DROP_REASON_FRAG_TOO_FAR: ipv4 fragment too far.
+ * (/proc/sys/net/ipv4/ipfrag_max_dist)
+ */
+ SKB_DROP_REASON_FRAG_TOO_FAR,
/**
* @SKB_DROP_REASON_MAX: the maximum of drop reason, which shouldn't be
* used as a real 'reason'
diff --git a/include/net/dsa.h b/include/net/dsa.h
index ee369670e20e..96086289aa9b 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -22,6 +22,7 @@
#include <net/devlink.h>
#include <net/switchdev.h>
+struct dsa_8021q_context;
struct tc_action;
struct phy_device;
struct fixed_phy_status;
@@ -118,10 +119,6 @@ struct dsa_netdevice_ops {
int cmd);
};
-#define DSA_TAG_DRIVER_ALIAS "dsa_tag-"
-#define MODULE_ALIAS_DSA_TAG_DRIVER(__proto) \
- MODULE_ALIAS(DSA_TAG_DRIVER_ALIAS __stringify(__proto##_VALUE))
-
struct dsa_lag {
struct net_device *dev;
unsigned int id;
@@ -880,9 +877,6 @@ struct dsa_switch_ops {
*/
void (*phylink_get_caps)(struct dsa_switch *ds, int port,
struct phylink_config *config);
- void (*phylink_validate)(struct dsa_switch *ds, int port,
- unsigned long *supported,
- struct phylink_link_state *state);
struct phylink_pcs *(*phylink_mac_select_pcs)(struct dsa_switch *ds,
int port,
phy_interface_t iface);
@@ -1292,8 +1286,6 @@ struct dsa_switch_driver {
const struct dsa_switch_ops *ops;
};
-struct net_device *dsa_dev_to_net_device(struct device *dev);
-
bool dsa_fdb_present_in_other_db(struct dsa_switch *ds, int port,
const unsigned char *addr, u16 vid,
struct dsa_db db);
@@ -1403,70 +1395,4 @@ static inline bool dsa_slave_dev_check(const struct net_device *dev)
netdev_tx_t dsa_enqueue_skb(struct sk_buff *skb, struct net_device *dev);
void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up);
-struct dsa_tag_driver {
- const struct dsa_device_ops *ops;
- struct list_head list;
- struct module *owner;
-};
-
-void dsa_tag_drivers_register(struct dsa_tag_driver *dsa_tag_driver_array[],
- unsigned int count,
- struct module *owner);
-void dsa_tag_drivers_unregister(struct dsa_tag_driver *dsa_tag_driver_array[],
- unsigned int count);
-
-#define dsa_tag_driver_module_drivers(__dsa_tag_drivers_array, __count) \
-static int __init dsa_tag_driver_module_init(void) \
-{ \
- dsa_tag_drivers_register(__dsa_tag_drivers_array, __count, \
- THIS_MODULE); \
- return 0; \
-} \
-module_init(dsa_tag_driver_module_init); \
- \
-static void __exit dsa_tag_driver_module_exit(void) \
-{ \
- dsa_tag_drivers_unregister(__dsa_tag_drivers_array, __count); \
-} \
-module_exit(dsa_tag_driver_module_exit)
-
-/**
- * module_dsa_tag_drivers() - Helper macro for registering DSA tag
- * drivers
- * @__ops_array: Array of tag driver structures
- *
- * Helper macro for DSA tag drivers which do not do anything special
- * in module init/exit. Each module may only use this macro once, and
- * calling it replaces module_init() and module_exit().
- */
-#define module_dsa_tag_drivers(__ops_array) \
-dsa_tag_driver_module_drivers(__ops_array, ARRAY_SIZE(__ops_array))
-
-#define DSA_TAG_DRIVER_NAME(__ops) dsa_tag_driver ## _ ## __ops
-
-/* Create a static structure we can build a linked list of dsa_tag
- * drivers
- */
-#define DSA_TAG_DRIVER(__ops) \
-static struct dsa_tag_driver DSA_TAG_DRIVER_NAME(__ops) = { \
- .ops = &__ops, \
-}
-
-/**
- * module_dsa_tag_driver() - Helper macro for registering a single DSA tag
- * driver
- * @__ops: Single tag driver structures
- *
- * Helper macro for DSA tag drivers which do not do anything special
- * in module init/exit. Each module may only use this macro once, and
- * calling it replaces module_init() and module_exit().
- */
-#define module_dsa_tag_driver(__ops) \
-DSA_TAG_DRIVER(__ops); \
- \
-static struct dsa_tag_driver *dsa_tag_driver_array[] = { \
- &DSA_TAG_DRIVER_NAME(__ops) \
-}; \
-module_dsa_tag_drivers(dsa_tag_driver_array)
#endif
-
diff --git a/include/net/dst.h b/include/net/dst.h
index 00b479ce6b99..d67fda89cd0f 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -356,9 +356,8 @@ static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev,
struct net *net)
{
- /* TODO : stats should be SMP safe */
- dev->stats.rx_packets++;
- dev->stats.rx_bytes += skb->len;
+ DEV_STATS_INC(dev, rx_packets);
+ DEV_STATS_ADD(dev, rx_bytes, skb->len);
__skb_tunnel_rx(skb, dev, net);
}
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index a454cf4327fe..1b7fae4c6b24 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -26,6 +26,7 @@ struct macsec_info {
struct xfrm_md_info {
u32 if_id;
int link;
+ struct dst_entry *dst_orig;
};
struct metadata_dst {
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index e343f9f8363e..0400a0ac8a29 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -32,6 +32,10 @@ struct flow_match_vlan {
struct flow_dissector_key_vlan *key, *mask;
};
+struct flow_match_arp {
+ struct flow_dissector_key_arp *key, *mask;
+};
+
struct flow_match_ipv4_addrs {
struct flow_dissector_key_ipv4_addrs *key, *mask;
};
@@ -98,6 +102,8 @@ void flow_rule_match_vlan(const struct flow_rule *rule,
struct flow_match_vlan *out);
void flow_rule_match_cvlan(const struct flow_rule *rule,
struct flow_match_vlan *out);
+void flow_rule_match_arp(const struct flow_rule *rule,
+ struct flow_match_arp *out);
void flow_rule_match_ipv4_addrs(const struct flow_rule *rule,
struct flow_match_ipv4_addrs *out);
void flow_rule_match_ipv6_addrs(const struct flow_rule *rule,
@@ -155,6 +161,7 @@ enum flow_action_id {
FLOW_ACTION_MARK,
FLOW_ACTION_PTYPE,
FLOW_ACTION_PRIORITY,
+ FLOW_ACTION_RX_QUEUE_MAPPING,
FLOW_ACTION_WAKE,
FLOW_ACTION_QUEUE,
FLOW_ACTION_SAMPLE,
@@ -247,6 +254,7 @@ struct flow_action_entry {
u32 csum_flags; /* FLOW_ACTION_CSUM */
u32 mark; /* FLOW_ACTION_MARK */
u16 ptype; /* FLOW_ACTION_PTYPE */
+ u16 rx_queue; /* FLOW_ACTION_RX_QUEUE_MAPPING */
u32 priority; /* FLOW_ACTION_PRIORITY */
struct { /* FLOW_ACTION_QUEUE */
u32 ctx;
diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h
index 524b510f1c68..9467e33dfb36 100644
--- a/include/net/fq_impl.h
+++ b/include/net/fq_impl.h
@@ -200,6 +200,7 @@ static void fq_tin_enqueue(struct fq *fq,
fq_skb_free_t free_func)
{
struct fq_flow *flow;
+ struct sk_buff *next;
bool oom;
lockdep_assert_held(&fq->lock);
@@ -214,11 +215,15 @@ static void fq_tin_enqueue(struct fq *fq,
}
flow->tin = tin;
- flow->backlog += skb->len;
- tin->backlog_bytes += skb->len;
- tin->backlog_packets++;
- fq->memory_usage += skb->truesize;
- fq->backlog++;
+ skb_list_walk_safe(skb, skb, next) {
+ skb_mark_not_on_list(skb);
+ flow->backlog += skb->len;
+ tin->backlog_bytes += skb->len;
+ tin->backlog_packets++;
+ fq->memory_usage += skb->truesize;
+ fq->backlog++;
+ __skb_queue_tail(&flow->queue, skb);
+ }
if (list_empty(&flow->flowchain)) {
flow->deficit = fq->quantum;
@@ -226,7 +231,6 @@ static void fq_tin_enqueue(struct fq *fq,
&tin->new_flows);
}
- __skb_queue_tail(&flow->queue, skb);
oom = (fq->memory_usage > fq->memory_limit);
while (fq->backlog > fq->limit || oom) {
flow = fq_find_fattest_flow(fq);
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 9f97f73615b6..ed4622dd4828 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -18,12 +18,11 @@ struct genl_multicast_group {
u8 flags;
};
-struct genl_ops;
+struct genl_split_ops;
struct genl_info;
/**
* struct genl_family - generic netlink family
- * @id: protocol family identifier (private)
* @hdrsize: length of user specific header in bytes
* @name: name of family
* @version: protocol version
@@ -43,12 +42,13 @@ struct genl_info;
* @resv_start_op: first operation for which reserved fields of the header
* can be validated and policies are required (see below);
* new families should leave this field at zero
- * @mcgrp_offset: starting number of multicast group IDs in this family
- * (private)
* @ops: the operations supported by this family
* @n_ops: number of operations supported by this family
* @small_ops: the small-struct operations supported by this family
* @n_small_ops: number of small-struct operations supported by this family
+ * @split_ops: the split do/dump form of operation definition
+ * @n_split_ops: number of entries in @split_ops, not that with split do/dump
+ * ops the number of entries is not the same as number of commands
*
* Attribute policies (the combination of @policy and @maxattr fields)
* can be attached at the family level or at the operation level.
@@ -58,29 +58,35 @@ struct genl_info;
* if policy is not provided core will reject all TLV attributes.
*/
struct genl_family {
- int id; /* private */
unsigned int hdrsize;
char name[GENL_NAMSIZ];
unsigned int version;
unsigned int maxattr;
- unsigned int mcgrp_offset; /* private */
u8 netnsok:1;
u8 parallel_ops:1;
u8 n_ops;
u8 n_small_ops;
+ u8 n_split_ops;
u8 n_mcgrps;
u8 resv_start_op;
const struct nla_policy *policy;
- int (*pre_doit)(const struct genl_ops *ops,
+ int (*pre_doit)(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
- void (*post_doit)(const struct genl_ops *ops,
+ void (*post_doit)(const struct genl_split_ops *ops,
struct sk_buff *skb,
struct genl_info *info);
const struct genl_ops * ops;
const struct genl_small_ops *small_ops;
+ const struct genl_split_ops *split_ops;
const struct genl_multicast_group *mcgrps;
struct module *module;
+
+/* private: internal use only */
+ /* protocol family identifier */
+ int id;
+ /* starting number of multicast group IDs in this family */
+ unsigned int mcgrp_offset;
};
/**
@@ -119,6 +125,9 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net)
#define GENL_SET_ERR_MSG(info, msg) NL_SET_ERR_MSG((info)->extack, msg)
+#define GENL_SET_ERR_MSG_FMT(info, msg, args...) \
+ NL_SET_ERR_MSG_FMT((info)->extack, msg, ##args)
+
/* Report that a root attribute is missing */
#define GENL_REQ_ATTR_CHECK(info, attr) ({ \
struct genl_info *__info = (info); \
@@ -182,6 +191,58 @@ struct genl_ops {
};
/**
+ * struct genl_split_ops - generic netlink operations (do/dump split version)
+ * @cmd: command identifier
+ * @internal_flags: flags used by the family
+ * @flags: GENL_* flags (%GENL_ADMIN_PERM or %GENL_UNS_ADMIN_PERM)
+ * @validate: validation flags from enum genl_validate_flags
+ * @policy: netlink policy (takes precedence over family policy)
+ * @maxattr: maximum number of attributes supported
+ *
+ * Do callbacks:
+ * @pre_doit: called before an operation's @doit callback, it may
+ * do additional, common, filtering and return an error
+ * @doit: standard command callback
+ * @post_doit: called after an operation's @doit callback, it may
+ * undo operations done by pre_doit, for example release locks
+ *
+ * Dump callbacks:
+ * @start: start callback for dumps
+ * @dumpit: callback for dumpers
+ * @done: completion callback for dumps
+ *
+ * Do callbacks can be used if %GENL_CMD_CAP_DO is set in @flags.
+ * Dump callbacks can be used if %GENL_CMD_CAP_DUMP is set in @flags.
+ * Exactly one of those flags must be set.
+ */
+struct genl_split_ops {
+ union {
+ struct {
+ int (*pre_doit)(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info);
+ int (*doit)(struct sk_buff *skb,
+ struct genl_info *info);
+ void (*post_doit)(const struct genl_split_ops *ops,
+ struct sk_buff *skb,
+ struct genl_info *info);
+ };
+ struct {
+ int (*start)(struct netlink_callback *cb);
+ int (*dumpit)(struct sk_buff *skb,
+ struct netlink_callback *cb);
+ int (*done)(struct netlink_callback *cb);
+ };
+ };
+ const struct nla_policy *policy;
+ unsigned int maxattr;
+ u8 cmd;
+ u8 internal_flags;
+ u8 flags;
+ u8 validate;
+};
+
+/**
* struct genl_dumpit_info - info that is available during dumpit op call
* @family: generic netlink family - for internal genl code usage
* @op: generic netlink ops - for internal genl code usage
@@ -189,7 +250,7 @@ struct genl_ops {
*/
struct genl_dumpit_info {
const struct genl_family *family;
- struct genl_ops op;
+ struct genl_split_ops op;
struct nlattr **attrs;
};
diff --git a/include/net/geneve.h b/include/net/geneve.h
index bced0b1d9fe4..5c96827a487e 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -59,7 +59,7 @@ struct genevehdr {
__be16 proto_type;
u8 vni[3];
u8 rsvd2;
- struct geneve_opt options[];
+ u8 options[];
};
static inline bool netif_is_geneve(const struct net_device *dev)
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 03b64bf876a4..4c33a20ea57f 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -85,6 +85,14 @@ struct ieee802154_hdr_fc {
#endif
};
+enum ieee802154_frame_version {
+ IEEE802154_2003_STD,
+ IEEE802154_2006_STD,
+ IEEE802154_STD,
+ IEEE802154_RESERVED_STD,
+ IEEE802154_MULTIPURPOSE_STD = IEEE802154_2003_STD,
+};
+
struct ieee802154_hdr {
struct ieee802154_hdr_fc fc;
u8 seq;
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 0b0876610553..b23ddec3cd5c 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -7,6 +7,7 @@
#include <linux/in6.h>
#include <linux/rbtree_types.h>
#include <linux/refcount.h>
+#include <net/dropreason.h>
/* Per netns frag queues directory */
struct fqdir {
@@ -34,12 +35,14 @@ struct fqdir {
* @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
* @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable
+ * @INET_FRAG_DROP: if skbs must be dropped (instead of being consumed)
*/
enum {
INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2),
INET_FRAG_HASH_DEAD = BIT(3),
+ INET_FRAG_DROP = BIT(4),
};
struct frag_v4_compare_key {
@@ -139,7 +142,8 @@ void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
/* Free all skbs in the queue; return the sum of their truesizes. */
-unsigned int inet_frag_rbtree_purge(struct rb_root *root);
+unsigned int inet_frag_rbtree_purge(struct rb_root *root,
+ enum skb_drop_reason reason);
static inline void inet_frag_put(struct inet_frag_queue *q)
{
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index ff1804a0c469..c6c61100d244 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -29,6 +29,7 @@
#include <net/netfilter/nf_conntrack.h>
#endif
#include <net/net_namespace.h> /* Netw namespace */
+#include <linux/sched/isolation.h>
#define IP_VS_HDR_INVERSE 1
#define IP_VS_HDR_ICMP 2
@@ -42,6 +43,8 @@ static inline struct netns_ipvs *net_ipvs(struct net* net)
/* Connections' size value needed by ip_vs_ctl.c */
extern int ip_vs_conn_tab_size;
+extern struct mutex __ip_vs_mutex;
+
struct ip_vs_iphdr {
int hdr_flags; /* ipvs flags */
__u32 off; /* Where IP or IPv4 header starts */
@@ -351,11 +354,11 @@ struct ip_vs_seq {
/* counters per cpu */
struct ip_vs_counters {
- __u64 conns; /* connections scheduled */
- __u64 inpkts; /* incoming packets */
- __u64 outpkts; /* outgoing packets */
- __u64 inbytes; /* incoming bytes */
- __u64 outbytes; /* outgoing bytes */
+ u64_stats_t conns; /* connections scheduled */
+ u64_stats_t inpkts; /* incoming packets */
+ u64_stats_t outpkts; /* outgoing packets */
+ u64_stats_t inbytes; /* incoming bytes */
+ u64_stats_t outbytes; /* outgoing bytes */
};
/* Stats per cpu */
struct ip_vs_cpu_stats {
@@ -363,9 +366,12 @@ struct ip_vs_cpu_stats {
struct u64_stats_sync syncp;
};
+/* Default nice for estimator kthreads */
+#define IPVS_EST_NICE 0
+
/* IPVS statistics objects */
struct ip_vs_estimator {
- struct list_head list;
+ struct hlist_node list;
u64 last_inbytes;
u64 last_outbytes;
@@ -378,6 +384,10 @@ struct ip_vs_estimator {
u64 outpps;
u64 inbps;
u64 outbps;
+
+ s32 ktid:16, /* kthread ID, -1=temp list */
+ ktrow:8, /* row/tick ID for kthread */
+ ktcid:8; /* chain ID for kthread tick */
};
/*
@@ -405,6 +415,76 @@ struct ip_vs_stats {
struct ip_vs_kstats kstats0; /* reset values */
};
+struct ip_vs_stats_rcu {
+ struct ip_vs_stats s;
+ struct rcu_head rcu_head;
+};
+
+int ip_vs_stats_init_alloc(struct ip_vs_stats *s);
+struct ip_vs_stats *ip_vs_stats_alloc(void);
+void ip_vs_stats_release(struct ip_vs_stats *stats);
+void ip_vs_stats_free(struct ip_vs_stats *stats);
+
+/* Process estimators in multiple timer ticks (20/50/100, see ktrow) */
+#define IPVS_EST_NTICKS 50
+/* Estimation uses a 2-second period containing ticks (in jiffies) */
+#define IPVS_EST_TICK ((2 * HZ) / IPVS_EST_NTICKS)
+
+/* Limit of CPU load per kthread (8 for 12.5%), ratio of CPU capacity (1/C).
+ * Value of 4 and above ensures kthreads will take work without exceeding
+ * the CPU capacity under different circumstances.
+ */
+#define IPVS_EST_LOAD_DIVISOR 8
+
+/* Kthreads should not have work that exceeds the CPU load above 50% */
+#define IPVS_EST_CPU_KTHREADS (IPVS_EST_LOAD_DIVISOR / 2)
+
+/* Desired number of chains per timer tick (chain load factor in 100us units),
+ * 48=4.8ms of 40ms tick (12% CPU usage):
+ * 2 sec * 1000 ms in sec * 10 (100us in ms) / 8 (12.5%) / 50
+ */
+#define IPVS_EST_CHAIN_FACTOR \
+ ALIGN_DOWN(2 * 1000 * 10 / IPVS_EST_LOAD_DIVISOR / IPVS_EST_NTICKS, 8)
+
+/* Compiled number of chains per tick
+ * The defines should match cond_resched_rcu
+ */
+#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
+#define IPVS_EST_TICK_CHAINS IPVS_EST_CHAIN_FACTOR
+#else
+#define IPVS_EST_TICK_CHAINS 1
+#endif
+
+#if IPVS_EST_NTICKS > 127
+#error Too many timer ticks for ktrow
+#endif
+
+/* Multiple chains processed in same tick */
+struct ip_vs_est_tick_data {
+ struct hlist_head chains[IPVS_EST_TICK_CHAINS];
+ DECLARE_BITMAP(present, IPVS_EST_TICK_CHAINS);
+ DECLARE_BITMAP(full, IPVS_EST_TICK_CHAINS);
+ int chain_len[IPVS_EST_TICK_CHAINS];
+};
+
+/* Context for estimation kthread */
+struct ip_vs_est_kt_data {
+ struct netns_ipvs *ipvs;
+ struct task_struct *task; /* task if running */
+ struct ip_vs_est_tick_data __rcu *ticks[IPVS_EST_NTICKS];
+ DECLARE_BITMAP(avail, IPVS_EST_NTICKS); /* tick has space for ests */
+ unsigned long est_timer; /* estimation timer (jiffies) */
+ struct ip_vs_stats *calc_stats; /* Used for calculation */
+ int tick_len[IPVS_EST_NTICKS]; /* est count */
+ int id; /* ktid per netns */
+ int chain_max; /* max ests per tick chain */
+ int tick_max; /* max ests per tick */
+ int est_count; /* attached ests to kthread */
+ int est_max_count; /* max ests per kthread */
+ int add_row; /* row for new ests */
+ int est_row; /* estimated row */
+};
+
struct dst_entry;
struct iphdr;
struct ip_vs_conn;
@@ -688,6 +768,7 @@ struct ip_vs_dest {
union nf_inet_addr vaddr; /* virtual IP address */
__u32 vfwmark; /* firewall mark of service */
+ struct rcu_head rcu_head;
struct list_head t_list; /* in dest_trash */
unsigned int in_rs_table:1; /* we are in rs_table */
};
@@ -869,7 +950,7 @@ struct netns_ipvs {
atomic_t conn_count; /* connection counter */
/* ip_vs_ctl */
- struct ip_vs_stats tot_stats; /* Statistics & est. */
+ struct ip_vs_stats_rcu *tot_stats; /* Statistics & est. */
int num_services; /* no of virtual services */
int num_services6; /* IPv6 virtual services */
@@ -932,6 +1013,12 @@ struct netns_ipvs {
int sysctl_schedule_icmp;
int sysctl_ignore_tunneled;
int sysctl_run_estimation;
+#ifdef CONFIG_SYSCTL
+ cpumask_var_t sysctl_est_cpulist; /* kthread cpumask */
+ int est_cpulist_valid; /* cpulist set */
+ int sysctl_est_nice; /* kthread nice */
+ int est_stopped; /* stop tasks */
+#endif
/* ip_vs_lblc */
int sysctl_lblc_expiration;
@@ -942,9 +1029,17 @@ struct netns_ipvs {
struct ctl_table_header *lblcr_ctl_header;
struct ctl_table *lblcr_ctl_table;
/* ip_vs_est */
- struct list_head est_list; /* estimator list */
- spinlock_t est_lock;
- struct timer_list est_timer; /* Estimation timer */
+ struct delayed_work est_reload_work;/* Reload kthread tasks */
+ struct mutex est_mutex; /* protect kthread tasks */
+ struct hlist_head est_temp_list; /* Ests during calc phase */
+ struct ip_vs_est_kt_data **est_kt_arr; /* Array of kthread data ptrs */
+ unsigned long est_max_threads;/* Hard limit of kthreads */
+ int est_calc_phase; /* Calculation phase */
+ int est_chain_max; /* Calculated chain_max */
+ int est_kt_count; /* Allocated ptrs */
+ int est_add_ktid; /* ktid where to add ests */
+ atomic_t est_genid; /* kthreads reload genid */
+ atomic_t est_genid_done; /* applied genid */
/* ip_vs_sync */
spinlock_t sync_lock;
struct ipvs_master_sync_state *ms;
@@ -1077,6 +1172,19 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
return ipvs->sysctl_run_estimation;
}
+static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
+{
+ if (ipvs->est_cpulist_valid)
+ return ipvs->sysctl_est_cpulist;
+ else
+ return housekeeping_cpumask(HK_TYPE_KTHREAD);
+}
+
+static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
+{
+ return ipvs->sysctl_est_nice;
+}
+
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
@@ -1174,6 +1282,16 @@ static inline int sysctl_run_estimation(struct netns_ipvs *ipvs)
return 1;
}
+static inline const struct cpumask *sysctl_est_cpulist(struct netns_ipvs *ipvs)
+{
+ return housekeeping_cpumask(HK_TYPE_KTHREAD);
+}
+
+static inline int sysctl_est_nice(struct netns_ipvs *ipvs)
+{
+ return IPVS_EST_NICE;
+}
+
#endif
/* IPVS core functions
@@ -1475,10 +1593,41 @@ int stop_sync_thread(struct netns_ipvs *ipvs, int state);
void ip_vs_sync_conn(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts);
/* IPVS rate estimator prototypes (from ip_vs_est.c) */
-void ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
+int ip_vs_start_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats);
void ip_vs_zero_estimator(struct ip_vs_stats *stats);
void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
+void ip_vs_est_reload_start(struct netns_ipvs *ipvs);
+int ip_vs_est_kthread_start(struct netns_ipvs *ipvs,
+ struct ip_vs_est_kt_data *kd);
+void ip_vs_est_kthread_stop(struct ip_vs_est_kt_data *kd);
+
+static inline void ip_vs_est_stopped_recalc(struct netns_ipvs *ipvs)
+{
+#ifdef CONFIG_SYSCTL
+ /* Stop tasks while cpulist is empty or if disabled with flag */
+ ipvs->est_stopped = !sysctl_run_estimation(ipvs) ||
+ (ipvs->est_cpulist_valid &&
+ cpumask_empty(sysctl_est_cpulist(ipvs)));
+#endif
+}
+
+static inline bool ip_vs_est_stopped(struct netns_ipvs *ipvs)
+{
+#ifdef CONFIG_SYSCTL
+ return ipvs->est_stopped;
+#else
+ return false;
+#endif
+}
+
+static inline int ip_vs_est_max_threads(struct netns_ipvs *ipvs)
+{
+ unsigned int limit = IPVS_EST_CPU_KTHREADS *
+ cpumask_weight(sysctl_est_cpulist(ipvs));
+
+ return max(1U, limit);
+}
/* Various IPVS packet transmitters (from ip_vs_xmit.c) */
int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d383c895592a..03f3af02a9a6 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -500,6 +500,39 @@ static inline int ipv6_has_hopopt_jumbo(const struct sk_buff *skb)
return jhdr->nexthdr;
}
+/* Return 0 if HBH header is successfully removed
+ * Or if HBH removal is unnecessary (packet is not big TCP)
+ * Return error to indicate dropping the packet
+ */
+static inline int ipv6_hopopt_jumbo_remove(struct sk_buff *skb)
+{
+ const int hophdr_len = sizeof(struct hop_jumbo_hdr);
+ int nexthdr = ipv6_has_hopopt_jumbo(skb);
+ struct ipv6hdr *h6;
+
+ if (!nexthdr)
+ return 0;
+
+ if (skb_cow_head(skb, 0))
+ return -1;
+
+ /* Remove the HBH header.
+ * Layout: [Ethernet header][IPv6 header][HBH][L4 Header]
+ */
+ memmove(skb_mac_header(skb) + hophdr_len, skb_mac_header(skb),
+ skb_network_header(skb) - skb_mac_header(skb) +
+ sizeof(struct ipv6hdr));
+
+ __skb_pull(skb, hophdr_len);
+ skb->network_header += hophdr_len;
+ skb->mac_header += hophdr_len;
+
+ h6 = ipv6_hdr(skb);
+ h6->nexthdr = nexthdr;
+
+ return 0;
+}
+
static inline bool ipv6_accept_ra(struct inet6_dev *idev)
{
/* If forwarding is enabled, RA are not accepted unless the special
diff --git a/include/net/ipv6_frag.h b/include/net/ipv6_frag.h
index 5052c66e22d2..7321ffe3a108 100644
--- a/include/net/ipv6_frag.h
+++ b/include/net/ipv6_frag.h
@@ -76,6 +76,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
+ fq->q.flags |= INET_FRAG_DROP;
inet_frag_kill(&fq->q);
dev = dev_get_by_index_rcu(net, fq->iif);
@@ -101,7 +102,7 @@ ip6frag_expire_frag_queue(struct net *net, struct frag_queue *fq)
spin_unlock(&fq->q.lock);
icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
- kfree_skb(head);
+ kfree_skb_reason(head, SKB_DROP_REASON_FRAG_REASM_TIMEOUT);
goto out_rcu_unlock;
out:
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ac2bad57933f..689da327ce2e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -89,15 +89,13 @@
/**
* DOC: mac80211 software tx queueing
*
- * mac80211 provides an optional intermediate queueing implementation designed
- * to allow the driver to keep hardware queues short and provide some fairness
- * between different stations/interfaces.
- * In this model, the driver pulls data frames from the mac80211 queue instead
- * of letting mac80211 push them via drv_tx().
- * Other frames (e.g. control or management) are still pushed using drv_tx().
+ * mac80211 uses an intermediate queueing implementation, designed to allow the
+ * driver to keep hardware queues short and to provide some fairness between
+ * different stations/interfaces.
*
- * Drivers indicate that they use this model by implementing the .wake_tx_queue
- * driver operation.
+ * Drivers must provide the .wake_tx_queue driver operation by either
+ * linking it to ieee80211_handle_wake_tx_queue() or implementing a custom
+ * handler.
*
* Intermediate queues (struct ieee80211_txq) are kept per-sta per-tid, with
* another per-sta for non-data/non-mgmt and bufferable management frames, and
@@ -106,9 +104,12 @@
* The driver is expected to initialize its private per-queue data for stations
* and interfaces in the .add_interface and .sta_add ops.
*
- * The driver can't access the queue directly. To dequeue a frame from a
- * txq, it calls ieee80211_tx_dequeue(). Whenever mac80211 adds a new frame to a
- * queue, it calls the .wake_tx_queue driver op.
+ * The driver can't access the internal TX queues (iTXQs) directly.
+ * Whenever mac80211 adds a new frame to a queue, it calls the .wake_tx_queue
+ * driver op.
+ * Drivers implementing a custom .wake_tx_queue op can get them by calling
+ * ieee80211_tx_dequeue(). Drivers using ieee80211_handle_wake_tx_queue() will
+ * simply get the individual frames pushed via the .tx driver operation.
*
* Drivers can optionally delegate responsibility for scheduling queues to
* mac80211, to take advantage of airtime fairness accounting. In this case, to
@@ -1806,6 +1807,10 @@ struct ieee80211_vif_cfg {
* @addr: address of this interface
* @p2p: indicates whether this AP or STA interface is a p2p
* interface, i.e. a GO or p2p-sta respectively
+ * @netdev_features: tx netdev features supported by the hardware for this
+ * vif. mac80211 initializes this to hw->netdev_features, and the driver
+ * can mask out specific tx features. mac80211 will handle software fixup
+ * for masked offloads (GSO, CSUM)
* @driver_flags: flags/capabilities the driver has for this interface,
* these need to be set (or cleared) when the interface is added
* or, if supported by the driver, the interface type is changed
@@ -1826,7 +1831,7 @@ struct ieee80211_vif_cfg {
* for this interface.
* @drv_priv: data area for driver use, will always be aligned to
* sizeof(void \*).
- * @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
+ * @txq: the multicast data TX queue
* @txqs_stopped: per AC flag to indicate that intermediate TXQs are stopped,
* protected by fq->lock.
* @offload_flags: 802.3 -> 802.11 enapsulation offload flags, see
@@ -1847,6 +1852,7 @@ struct ieee80211_vif {
struct ieee80211_txq *txq;
+ netdev_features_t netdev_features;
u32 driver_flags;
u32 offload_flags;
@@ -1915,6 +1921,10 @@ static inline bool lockdep_vif_mutex_held(struct ieee80211_vif *vif)
rcu_dereference_protected((vif)->link_conf[link_id], \
lockdep_vif_mutex_held(vif))
+#define link_conf_dereference_check(vif, link_id) \
+ rcu_dereference_check((vif)->link_conf[link_id], \
+ lockdep_vif_mutex_held(vif))
+
/**
* enum ieee80211_key_flags - key flags
*
@@ -2176,6 +2186,7 @@ struct ieee80211_sta_aggregates {
* All link specific info for a STA link for a non MLD STA(single)
* or a MLD STA(multiple entries) are stored here.
*
+ * @sta: reference to owning STA
* @addr: MAC address of the Link STA. For non-MLO STA this is same as the addr
* in ieee80211_sta. For MLO Link STA this addr can be same or different
* from addr in ieee80211_sta (representing MLD STA addr)
@@ -2196,6 +2207,8 @@ struct ieee80211_sta_aggregates {
*
*/
struct ieee80211_link_sta {
+ struct ieee80211_sta *sta;
+
u8 addr[ETH_ALEN];
u8 link_id;
enum ieee80211_smps_mode smps_mode;
@@ -2252,8 +2265,8 @@ struct ieee80211_link_sta {
* For non MLO STA it will point to the deflink data. For MLO STA
* ieee80211_sta_recalc_aggregates() must be called to update it.
* @support_p2p_ps: indicates whether the STA supports P2P PS mechanism or not.
- * @txq: per-TID data TX queues (if driver uses the TXQ abstraction); note that
- * the last entry (%IEEE80211_NUM_TIDS) is used for non-data frames
+ * @txq: per-TID data TX queues; note that the last entry (%IEEE80211_NUM_TIDS)
+ * is used for non-data frames
* @deflink: This holds the default link STA information, for non MLO STA all link
* specific STA information is accessed through @deflink or through
* link[0] which points to address of @deflink. For MLO Link STA
@@ -2308,6 +2321,10 @@ static inline bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta)
rcu_dereference_protected((sta)->link[link_id], \
lockdep_sta_mutex_held(sta))
+#define link_sta_dereference_check(sta, link_id) \
+ rcu_dereference_check((sta)->link[link_id], \
+ lockdep_sta_mutex_held(sta))
+
#define for_each_sta_active_link(vif, sta, link_sta, link_id) \
for (link_id = 0; link_id < ARRAY_SIZE((sta)->link); link_id++) \
if ((!(vif)->active_links || \
@@ -3787,6 +3804,13 @@ struct ieee80211_prep_tx_info {
* should be within a CONFIG_MAC80211_DEBUGFS conditional. This
* callback can sleep.
*
+ * @link_sta_add_debugfs: Drivers can use this callback to add debugfs files
+ * when a link is added to a mac80211 station. This callback
+ * should be within a CPTCFG_MAC80211_DEBUGFS conditional. This
+ * callback can sleep.
+ * For non-MLO the callback will be called once for the deflink with the
+ * station's directory rather than a separate subdirectory.
+ *
* @sta_notify: Notifies low level driver about power state transition of an
* associated station, AP, IBSS/WDS/mesh peer etc. For a VIF operating
* in AP mode, this callback will not be called when the flag
@@ -4257,6 +4281,10 @@ struct ieee80211_ops {
struct ieee80211_vif *vif,
struct ieee80211_sta *sta,
struct dentry *dir);
+ void (*link_sta_add_debugfs)(struct ieee80211_hw *hw,
+ struct ieee80211_vif *vif,
+ struct ieee80211_link_sta *link_sta,
+ struct dentry *dir);
#endif
void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
enum sta_notify_cmd, struct ieee80211_sta *sta);
@@ -5691,7 +5719,7 @@ void ieee80211_key_replay(struct ieee80211_key_conf *keyconf);
* @hw: pointer as obtained from ieee80211_alloc_hw().
* @queue: queue number (counted from zero).
*
- * Drivers should use this function instead of netif_wake_queue.
+ * Drivers must use this function instead of netif_wake_queue.
*/
void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue);
@@ -5700,7 +5728,7 @@ void ieee80211_wake_queue(struct ieee80211_hw *hw, int queue);
* @hw: pointer as obtained from ieee80211_alloc_hw().
* @queue: queue number (counted from zero).
*
- * Drivers should use this function instead of netif_stop_queue.
+ * Drivers must use this function instead of netif_stop_queue.
*/
void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue);
@@ -5709,7 +5737,7 @@ void ieee80211_stop_queue(struct ieee80211_hw *hw, int queue);
* @hw: pointer as obtained from ieee80211_alloc_hw().
* @queue: queue number (counted from zero).
*
- * Drivers should use this function instead of netif_stop_queue.
+ * Drivers must use this function instead of netif_queue_stopped.
*
* Return: %true if the queue is stopped. %false otherwise.
*/
@@ -5720,7 +5748,7 @@ int ieee80211_queue_stopped(struct ieee80211_hw *hw, int queue);
* ieee80211_stop_queues - stop all queues
* @hw: pointer as obtained from ieee80211_alloc_hw().
*
- * Drivers should use this function instead of netif_stop_queue.
+ * Drivers must use this function instead of netif_tx_stop_all_queues.
*/
void ieee80211_stop_queues(struct ieee80211_hw *hw);
@@ -5728,7 +5756,7 @@ void ieee80211_stop_queues(struct ieee80211_hw *hw);
* ieee80211_wake_queues - wake all queues
* @hw: pointer as obtained from ieee80211_alloc_hw().
*
- * Drivers should use this function instead of netif_wake_queue.
+ * Drivers must use this function instead of netif_tx_wake_all_queues.
*/
void ieee80211_wake_queues(struct ieee80211_hw *hw);
@@ -6950,6 +6978,18 @@ static inline struct sk_buff *ieee80211_tx_dequeue_ni(struct ieee80211_hw *hw,
}
/**
+ * ieee80211_handle_wake_tx_queue - mac80211 handler for wake_tx_queue callback
+ *
+ * @hw: pointer as obtained from wake_tx_queue() callback().
+ * @txq: pointer as obtained from wake_tx_queue() callback().
+ *
+ * Drivers can use this function for the mandatory mac80211 wake_tx_queue
+ * callback in struct ieee80211_ops. They should not call this function.
+ */
+void ieee80211_handle_wake_tx_queue(struct ieee80211_hw *hw,
+ struct ieee80211_txq *txq);
+
+/**
* ieee80211_next_txq - get next tx queue to pull packets from
*
* @hw: pointer as obtained from ieee80211_alloc_hw()
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index bdac0ddbdcdb..4a3a9de9da73 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -111,9 +111,6 @@ struct ieee802154_hw {
* promiscuous mode setting.
*
* @IEEE802154_HW_RX_OMIT_CKSUM: Indicates that receiver omits FCS.
- *
- * @IEEE802154_HW_RX_DROP_BAD_CKSUM: Indicates that receiver will not filter
- * frames with bad checksum.
*/
enum ieee802154_hw_flags {
IEEE802154_HW_TX_OMIT_CKSUM = BIT(0),
@@ -123,7 +120,6 @@ enum ieee802154_hw_flags {
IEEE802154_HW_AFILT = BIT(4),
IEEE802154_HW_PROMISCUOUS = BIT(5),
IEEE802154_HW_RX_OMIT_CKSUM = BIT(6),
- IEEE802154_HW_RX_DROP_BAD_CKSUM = BIT(7),
};
/* Indicates that receiver omits FCS and xmitter will add FCS on it's own. */
@@ -460,33 +456,6 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw);
*/
void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb,
u8 lqi);
-/**
- * ieee802154_wake_queue - wake ieee802154 queue
- * @hw: pointer as obtained from ieee802154_alloc_hw().
- *
- * Tranceivers usually have either one transmit framebuffer or one framebuffer
- * for both transmitting and receiving. Hence, the core currently only handles
- * one frame at a time for each phy, which means we had to stop the queue to
- * avoid new skb to come during the transmission. The queue then needs to be
- * woken up after the operation.
- *
- * Drivers should use this function instead of netif_wake_queue.
- */
-void ieee802154_wake_queue(struct ieee802154_hw *hw);
-
-/**
- * ieee802154_stop_queue - stop ieee802154 queue
- * @hw: pointer as obtained from ieee802154_alloc_hw().
- *
- * Tranceivers usually have either one transmit framebuffer or one framebuffer
- * for both transmitting and receiving. Hence, the core currently only handles
- * one frame at a time for each phy, which means we need to tell upper layers to
- * stop giving us new skbs while we are busy with the transmitted one. The queue
- * must then be stopped before transmitting.
- *
- * Drivers should use this function instead of netif_stop_queue.
- */
-void ieee802154_stop_queue(struct ieee802154_hw *hw);
/**
* ieee802154_xmit_complete - frame transmission complete
diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h
new file mode 100644
index 000000000000..d80c78506f19
--- /dev/null
+++ b/include/net/mana/gdma.h
@@ -0,0 +1,841 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _GDMA_H
+#define _GDMA_H
+
+#include <linux/dma-mapping.h>
+#include <linux/netdevice.h>
+
+#include "shm_channel.h"
+
+#define GDMA_STATUS_MORE_ENTRIES 0x00000105
+
+/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+enum gdma_request_type {
+ GDMA_VERIFY_VF_DRIVER_VERSION = 1,
+ GDMA_QUERY_MAX_RESOURCES = 2,
+ GDMA_LIST_DEVICES = 3,
+ GDMA_REGISTER_DEVICE = 4,
+ GDMA_DEREGISTER_DEVICE = 5,
+ GDMA_GENERATE_TEST_EQE = 10,
+ GDMA_CREATE_QUEUE = 12,
+ GDMA_DISABLE_QUEUE = 13,
+ GDMA_ALLOCATE_RESOURCE_RANGE = 22,
+ GDMA_DESTROY_RESOURCE_RANGE = 24,
+ GDMA_CREATE_DMA_REGION = 25,
+ GDMA_DMA_REGION_ADD_PAGES = 26,
+ GDMA_DESTROY_DMA_REGION = 27,
+ GDMA_CREATE_PD = 29,
+ GDMA_DESTROY_PD = 30,
+ GDMA_CREATE_MR = 31,
+ GDMA_DESTROY_MR = 32,
+};
+
+#define GDMA_RESOURCE_DOORBELL_PAGE 27
+
+enum gdma_queue_type {
+ GDMA_INVALID_QUEUE,
+ GDMA_SQ,
+ GDMA_RQ,
+ GDMA_CQ,
+ GDMA_EQ,
+};
+
+enum gdma_work_request_flags {
+ GDMA_WR_NONE = 0,
+ GDMA_WR_OOB_IN_SGL = BIT(0),
+ GDMA_WR_PAD_BY_SGE0 = BIT(1),
+};
+
+enum gdma_eqe_type {
+ GDMA_EQE_COMPLETION = 3,
+ GDMA_EQE_TEST_EVENT = 64,
+ GDMA_EQE_HWC_INIT_EQ_ID_DB = 129,
+ GDMA_EQE_HWC_INIT_DATA = 130,
+ GDMA_EQE_HWC_INIT_DONE = 131,
+};
+
+enum {
+ GDMA_DEVICE_NONE = 0,
+ GDMA_DEVICE_HWC = 1,
+ GDMA_DEVICE_MANA = 2,
+};
+
+typedef u64 gdma_obj_handle_t;
+
+struct gdma_resource {
+ /* Protect the bitmap */
+ spinlock_t lock;
+
+ /* The bitmap size in bits. */
+ u32 size;
+
+ /* The bitmap tracks the resources. */
+ unsigned long *map;
+};
+
+union gdma_doorbell_entry {
+ u64 as_uint64;
+
+ struct {
+ u64 id : 24;
+ u64 reserved : 8;
+ u64 tail_ptr : 31;
+ u64 arm : 1;
+ } cq;
+
+ struct {
+ u64 id : 24;
+ u64 wqe_cnt : 8;
+ u64 tail_ptr : 32;
+ } rq;
+
+ struct {
+ u64 id : 24;
+ u64 reserved : 8;
+ u64 tail_ptr : 32;
+ } sq;
+
+ struct {
+ u64 id : 16;
+ u64 reserved : 16;
+ u64 tail_ptr : 31;
+ u64 arm : 1;
+ } eq;
+}; /* HW DATA */
+
+struct gdma_msg_hdr {
+ u32 hdr_type;
+ u32 msg_type;
+ u16 msg_version;
+ u16 hwc_msg_id;
+ u32 msg_size;
+}; /* HW DATA */
+
+struct gdma_dev_id {
+ union {
+ struct {
+ u16 type;
+ u16 instance;
+ };
+
+ u32 as_uint32;
+ };
+}; /* HW DATA */
+
+struct gdma_req_hdr {
+ struct gdma_msg_hdr req;
+ struct gdma_msg_hdr resp; /* The expected response */
+ struct gdma_dev_id dev_id;
+ u32 activity_id;
+}; /* HW DATA */
+
+struct gdma_resp_hdr {
+ struct gdma_msg_hdr response;
+ struct gdma_dev_id dev_id;
+ u32 activity_id;
+ u32 status;
+ u32 reserved;
+}; /* HW DATA */
+
+struct gdma_general_req {
+ struct gdma_req_hdr hdr;
+}; /* HW DATA */
+
+#define GDMA_MESSAGE_V1 1
+
+struct gdma_general_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+#define GDMA_STANDARD_HEADER_TYPE 0
+
+static inline void mana_gd_init_req_hdr(struct gdma_req_hdr *hdr, u32 code,
+ u32 req_size, u32 resp_size)
+{
+ hdr->req.hdr_type = GDMA_STANDARD_HEADER_TYPE;
+ hdr->req.msg_type = code;
+ hdr->req.msg_version = GDMA_MESSAGE_V1;
+ hdr->req.msg_size = req_size;
+
+ hdr->resp.hdr_type = GDMA_STANDARD_HEADER_TYPE;
+ hdr->resp.msg_type = code;
+ hdr->resp.msg_version = GDMA_MESSAGE_V1;
+ hdr->resp.msg_size = resp_size;
+}
+
+/* The 16-byte struct is part of the GDMA work queue entry (WQE). */
+struct gdma_sge {
+ u64 address;
+ u32 mem_key;
+ u32 size;
+}; /* HW DATA */
+
+struct gdma_wqe_request {
+ struct gdma_sge *sgl;
+ u32 num_sge;
+
+ u32 inline_oob_size;
+ const void *inline_oob_data;
+
+ u32 flags;
+ u32 client_data_unit;
+};
+
+enum gdma_page_type {
+ GDMA_PAGE_TYPE_4K,
+};
+
+#define GDMA_INVALID_DMA_REGION 0
+
+struct gdma_mem_info {
+ struct device *dev;
+
+ dma_addr_t dma_handle;
+ void *virt_addr;
+ u64 length;
+
+ /* Allocated by the PF driver */
+ gdma_obj_handle_t dma_region_handle;
+};
+
+#define REGISTER_ATB_MST_MKEY_LOWER_SIZE 8
+
+struct gdma_dev {
+ struct gdma_context *gdma_context;
+
+ struct gdma_dev_id dev_id;
+
+ u32 pdid;
+ u32 doorbell;
+ u32 gpa_mkey;
+
+ /* GDMA driver specific pointer */
+ void *driver_data;
+
+ struct auxiliary_device *adev;
+};
+
+#define MINIMUM_SUPPORTED_PAGE_SIZE PAGE_SIZE
+
+#define GDMA_CQE_SIZE 64
+#define GDMA_EQE_SIZE 16
+#define GDMA_MAX_SQE_SIZE 512
+#define GDMA_MAX_RQE_SIZE 256
+
+#define GDMA_COMP_DATA_SIZE 0x3C
+
+#define GDMA_EVENT_DATA_SIZE 0xC
+
+/* The WQE size must be a multiple of the Basic Unit, which is 32 bytes. */
+#define GDMA_WQE_BU_SIZE 32
+
+#define INVALID_PDID UINT_MAX
+#define INVALID_DOORBELL UINT_MAX
+#define INVALID_MEM_KEY UINT_MAX
+#define INVALID_QUEUE_ID UINT_MAX
+#define INVALID_PCI_MSIX_INDEX UINT_MAX
+
+struct gdma_comp {
+ u32 cqe_data[GDMA_COMP_DATA_SIZE / 4];
+ u32 wq_num;
+ bool is_sq;
+};
+
+struct gdma_event {
+ u32 details[GDMA_EVENT_DATA_SIZE / 4];
+ u8 type;
+};
+
+struct gdma_queue;
+
+struct mana_eq {
+ struct gdma_queue *eq;
+};
+
+typedef void gdma_eq_callback(void *context, struct gdma_queue *q,
+ struct gdma_event *e);
+
+typedef void gdma_cq_callback(void *context, struct gdma_queue *q);
+
+/* The 'head' is the producer index. For SQ/RQ, when the driver posts a WQE
+ * (Note: the WQE size must be a multiple of the 32-byte Basic Unit), the
+ * driver increases the 'head' in BUs rather than in bytes, and notifies
+ * the HW of the updated head. For EQ/CQ, the driver uses the 'head' to track
+ * the HW head, and increases the 'head' by 1 for every processed EQE/CQE.
+ *
+ * The 'tail' is the consumer index for SQ/RQ. After the CQE of the SQ/RQ is
+ * processed, the driver increases the 'tail' to indicate that WQEs have
+ * been consumed by the HW, so the driver can post new WQEs into the SQ/RQ.
+ *
+ * The driver doesn't use the 'tail' for EQ/CQ, because the driver ensures
+ * that the EQ/CQ is big enough so they can't overflow, and the driver uses
+ * the owner bits mechanism to detect if the queue has become empty.
+ */
+struct gdma_queue {
+ struct gdma_dev *gdma_dev;
+
+ enum gdma_queue_type type;
+ u32 id;
+
+ struct gdma_mem_info mem_info;
+
+ void *queue_mem_ptr;
+ u32 queue_size;
+
+ bool monitor_avl_buf;
+
+ u32 head;
+ u32 tail;
+
+ /* Extra fields specific to EQ/CQ. */
+ union {
+ struct {
+ bool disable_needed;
+
+ gdma_eq_callback *callback;
+ void *context;
+
+ unsigned int msix_index;
+
+ u32 log2_throttle_limit;
+ } eq;
+
+ struct {
+ gdma_cq_callback *callback;
+ void *context;
+
+ struct gdma_queue *parent; /* For CQ/EQ relationship */
+ } cq;
+ };
+};
+
+struct gdma_queue_spec {
+ enum gdma_queue_type type;
+ bool monitor_avl_buf;
+ unsigned int queue_size;
+
+ /* Extra fields specific to EQ/CQ. */
+ union {
+ struct {
+ gdma_eq_callback *callback;
+ void *context;
+
+ unsigned long log2_throttle_limit;
+ } eq;
+
+ struct {
+ gdma_cq_callback *callback;
+ void *context;
+
+ struct gdma_queue *parent_eq;
+
+ } cq;
+ };
+};
+
+struct gdma_irq_context {
+ void (*handler)(void *arg);
+ void *arg;
+};
+
+struct gdma_context {
+ struct device *dev;
+
+ /* Per-vPort max number of queues */
+ unsigned int max_num_queues;
+ unsigned int max_num_msix;
+ unsigned int num_msix_usable;
+ struct gdma_resource msix_resource;
+ struct gdma_irq_context *irq_contexts;
+
+ /* This maps a CQ index to the queue structure. */
+ unsigned int max_num_cqs;
+ struct gdma_queue **cq_table;
+
+ /* Protect eq_test_event and test_event_eq_id */
+ struct mutex eq_test_event_mutex;
+ struct completion eq_test_event;
+ u32 test_event_eq_id;
+
+ bool is_pf;
+ phys_addr_t bar0_pa;
+ void __iomem *bar0_va;
+ void __iomem *shm_base;
+ void __iomem *db_page_base;
+ phys_addr_t phys_db_page_base;
+ u32 db_page_size;
+ int numa_node;
+
+ /* Shared memory chanenl (used to bootstrap HWC) */
+ struct shm_channel shm_channel;
+
+ /* Hardware communication channel (HWC) */
+ struct gdma_dev hwc;
+
+ /* Azure network adapter */
+ struct gdma_dev mana;
+};
+
+#define MAX_NUM_GDMA_DEVICES 4
+
+static inline bool mana_gd_is_mana(struct gdma_dev *gd)
+{
+ return gd->dev_id.type == GDMA_DEVICE_MANA;
+}
+
+static inline bool mana_gd_is_hwc(struct gdma_dev *gd)
+{
+ return gd->dev_id.type == GDMA_DEVICE_HWC;
+}
+
+u8 *mana_gd_get_wqe_ptr(const struct gdma_queue *wq, u32 wqe_offset);
+u32 mana_gd_wq_avail_space(struct gdma_queue *wq);
+
+int mana_gd_test_eq(struct gdma_context *gc, struct gdma_queue *eq);
+
+int mana_gd_create_hwc_queue(struct gdma_dev *gd,
+ const struct gdma_queue_spec *spec,
+ struct gdma_queue **queue_ptr);
+
+int mana_gd_create_mana_eq(struct gdma_dev *gd,
+ const struct gdma_queue_spec *spec,
+ struct gdma_queue **queue_ptr);
+
+int mana_gd_create_mana_wq_cq(struct gdma_dev *gd,
+ const struct gdma_queue_spec *spec,
+ struct gdma_queue **queue_ptr);
+
+void mana_gd_destroy_queue(struct gdma_context *gc, struct gdma_queue *queue);
+
+int mana_gd_poll_cq(struct gdma_queue *cq, struct gdma_comp *comp, int num_cqe);
+
+void mana_gd_ring_cq(struct gdma_queue *cq, u8 arm_bit);
+
+struct gdma_wqe {
+ u32 reserved :24;
+ u32 last_vbytes :8;
+
+ union {
+ u32 flags;
+
+ struct {
+ u32 num_sge :8;
+ u32 inline_oob_size_div4:3;
+ u32 client_oob_in_sgl :1;
+ u32 reserved1 :4;
+ u32 client_data_unit :14;
+ u32 reserved2 :2;
+ };
+ };
+}; /* HW DATA */
+
+#define INLINE_OOB_SMALL_SIZE 8
+#define INLINE_OOB_LARGE_SIZE 24
+
+#define MAX_TX_WQE_SIZE 512
+#define MAX_RX_WQE_SIZE 256
+
+#define MAX_TX_WQE_SGL_ENTRIES ((GDMA_MAX_SQE_SIZE - \
+ sizeof(struct gdma_sge) - INLINE_OOB_SMALL_SIZE) / \
+ sizeof(struct gdma_sge))
+
+#define MAX_RX_WQE_SGL_ENTRIES ((GDMA_MAX_RQE_SIZE - \
+ sizeof(struct gdma_sge)) / sizeof(struct gdma_sge))
+
+struct gdma_cqe {
+ u32 cqe_data[GDMA_COMP_DATA_SIZE / 4];
+
+ union {
+ u32 as_uint32;
+
+ struct {
+ u32 wq_num : 24;
+ u32 is_sq : 1;
+ u32 reserved : 4;
+ u32 owner_bits : 3;
+ };
+ } cqe_info;
+}; /* HW DATA */
+
+#define GDMA_CQE_OWNER_BITS 3
+
+#define GDMA_CQE_OWNER_MASK ((1 << GDMA_CQE_OWNER_BITS) - 1)
+
+#define SET_ARM_BIT 1
+
+#define GDMA_EQE_OWNER_BITS 3
+
+union gdma_eqe_info {
+ u32 as_uint32;
+
+ struct {
+ u32 type : 8;
+ u32 reserved1 : 8;
+ u32 client_id : 2;
+ u32 reserved2 : 11;
+ u32 owner_bits : 3;
+ };
+}; /* HW DATA */
+
+#define GDMA_EQE_OWNER_MASK ((1 << GDMA_EQE_OWNER_BITS) - 1)
+#define INITIALIZED_OWNER_BIT(log2_num_entries) (1UL << (log2_num_entries))
+
+struct gdma_eqe {
+ u32 details[GDMA_EVENT_DATA_SIZE / 4];
+ u32 eqe_info;
+}; /* HW DATA */
+
+#define GDMA_REG_DB_PAGE_OFFSET 8
+#define GDMA_REG_DB_PAGE_SIZE 0x10
+#define GDMA_REG_SHM_OFFSET 0x18
+
+#define GDMA_PF_REG_DB_PAGE_SIZE 0xD0
+#define GDMA_PF_REG_DB_PAGE_OFF 0xC8
+#define GDMA_PF_REG_SHM_OFF 0x70
+
+#define GDMA_SRIOV_REG_CFG_BASE_OFF 0x108
+
+#define MANA_PF_DEVICE_ID 0x00B9
+#define MANA_VF_DEVICE_ID 0x00BA
+
+struct gdma_posted_wqe_info {
+ u32 wqe_size_in_bu;
+};
+
+/* GDMA_GENERATE_TEST_EQE */
+struct gdma_generate_test_event_req {
+ struct gdma_req_hdr hdr;
+ u32 queue_index;
+}; /* HW DATA */
+
+/* GDMA_VERIFY_VF_DRIVER_VERSION */
+enum {
+ GDMA_PROTOCOL_V1 = 1,
+ GDMA_PROTOCOL_FIRST = GDMA_PROTOCOL_V1,
+ GDMA_PROTOCOL_LAST = GDMA_PROTOCOL_V1,
+};
+
+#define GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT BIT(0)
+
+/* Advertise to the NIC firmware: the NAPI work_done variable race is fixed,
+ * so the driver is able to reliably support features like busy_poll.
+ */
+#define GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX BIT(2)
+
+#define GDMA_DRV_CAP_FLAGS1 \
+ (GDMA_DRV_CAP_FLAG_1_EQ_SHARING_MULTI_VPORT | \
+ GDMA_DRV_CAP_FLAG_1_NAPI_WKDONE_FIX)
+
+#define GDMA_DRV_CAP_FLAGS2 0
+
+#define GDMA_DRV_CAP_FLAGS3 0
+
+#define GDMA_DRV_CAP_FLAGS4 0
+
+struct gdma_verify_ver_req {
+ struct gdma_req_hdr hdr;
+
+ /* Mandatory fields required for protocol establishment */
+ u64 protocol_ver_min;
+ u64 protocol_ver_max;
+
+ /* Gdma Driver Capability Flags */
+ u64 gd_drv_cap_flags1;
+ u64 gd_drv_cap_flags2;
+ u64 gd_drv_cap_flags3;
+ u64 gd_drv_cap_flags4;
+
+ /* Advisory fields */
+ u64 drv_ver;
+ u32 os_type; /* Linux = 0x10; Windows = 0x20; Other = 0x30 */
+ u32 reserved;
+ u32 os_ver_major;
+ u32 os_ver_minor;
+ u32 os_ver_build;
+ u32 os_ver_platform;
+ u64 reserved_2;
+ u8 os_ver_str1[128];
+ u8 os_ver_str2[128];
+ u8 os_ver_str3[128];
+ u8 os_ver_str4[128];
+}; /* HW DATA */
+
+struct gdma_verify_ver_resp {
+ struct gdma_resp_hdr hdr;
+ u64 gdma_protocol_ver;
+ u64 pf_cap_flags1;
+ u64 pf_cap_flags2;
+ u64 pf_cap_flags3;
+ u64 pf_cap_flags4;
+}; /* HW DATA */
+
+/* GDMA_QUERY_MAX_RESOURCES */
+struct gdma_query_max_resources_resp {
+ struct gdma_resp_hdr hdr;
+ u32 status;
+ u32 max_sq;
+ u32 max_rq;
+ u32 max_cq;
+ u32 max_eq;
+ u32 max_db;
+ u32 max_mst;
+ u32 max_cq_mod_ctx;
+ u32 max_mod_cq;
+ u32 max_msix;
+}; /* HW DATA */
+
+/* GDMA_LIST_DEVICES */
+struct gdma_list_devices_resp {
+ struct gdma_resp_hdr hdr;
+ u32 num_of_devs;
+ u32 reserved;
+ struct gdma_dev_id devs[64];
+}; /* HW DATA */
+
+/* GDMA_REGISTER_DEVICE */
+struct gdma_register_device_resp {
+ struct gdma_resp_hdr hdr;
+ u32 pdid;
+ u32 gpa_mkey;
+ u32 db_id;
+}; /* HW DATA */
+
+struct gdma_allocate_resource_range_req {
+ struct gdma_req_hdr hdr;
+ u32 resource_type;
+ u32 num_resources;
+ u32 alignment;
+ u32 allocated_resources;
+};
+
+struct gdma_allocate_resource_range_resp {
+ struct gdma_resp_hdr hdr;
+ u32 allocated_resources;
+};
+
+struct gdma_destroy_resource_range_req {
+ struct gdma_req_hdr hdr;
+ u32 resource_type;
+ u32 num_resources;
+ u32 allocated_resources;
+};
+
+/* GDMA_CREATE_QUEUE */
+struct gdma_create_queue_req {
+ struct gdma_req_hdr hdr;
+ u32 type;
+ u32 reserved1;
+ u32 pdid;
+ u32 doolbell_id;
+ gdma_obj_handle_t gdma_region;
+ u32 reserved2;
+ u32 queue_size;
+ u32 log2_throttle_limit;
+ u32 eq_pci_msix_index;
+ u32 cq_mod_ctx_id;
+ u32 cq_parent_eq_id;
+ u8 rq_drop_on_overrun;
+ u8 rq_err_on_wqe_overflow;
+ u8 rq_chain_rec_wqes;
+ u8 sq_hw_db;
+ u32 reserved3;
+}; /* HW DATA */
+
+struct gdma_create_queue_resp {
+ struct gdma_resp_hdr hdr;
+ u32 queue_index;
+}; /* HW DATA */
+
+/* GDMA_DISABLE_QUEUE */
+struct gdma_disable_queue_req {
+ struct gdma_req_hdr hdr;
+ u32 type;
+ u32 queue_index;
+ u32 alloc_res_id_on_creation;
+}; /* HW DATA */
+
+enum atb_page_size {
+ ATB_PAGE_SIZE_4K,
+ ATB_PAGE_SIZE_8K,
+ ATB_PAGE_SIZE_16K,
+ ATB_PAGE_SIZE_32K,
+ ATB_PAGE_SIZE_64K,
+ ATB_PAGE_SIZE_128K,
+ ATB_PAGE_SIZE_256K,
+ ATB_PAGE_SIZE_512K,
+ ATB_PAGE_SIZE_1M,
+ ATB_PAGE_SIZE_2M,
+ ATB_PAGE_SIZE_MAX,
+};
+
+enum gdma_mr_access_flags {
+ GDMA_ACCESS_FLAG_LOCAL_READ = BIT_ULL(0),
+ GDMA_ACCESS_FLAG_LOCAL_WRITE = BIT_ULL(1),
+ GDMA_ACCESS_FLAG_REMOTE_READ = BIT_ULL(2),
+ GDMA_ACCESS_FLAG_REMOTE_WRITE = BIT_ULL(3),
+ GDMA_ACCESS_FLAG_REMOTE_ATOMIC = BIT_ULL(4),
+};
+
+/* GDMA_CREATE_DMA_REGION */
+struct gdma_create_dma_region_req {
+ struct gdma_req_hdr hdr;
+
+ /* The total size of the DMA region */
+ u64 length;
+
+ /* The offset in the first page */
+ u32 offset_in_page;
+
+ /* enum gdma_page_type */
+ u32 gdma_page_type;
+
+ /* The total number of pages */
+ u32 page_count;
+
+ /* If page_addr_list_len is smaller than page_count,
+ * the remaining page addresses will be added via the
+ * message GDMA_DMA_REGION_ADD_PAGES.
+ */
+ u32 page_addr_list_len;
+ u64 page_addr_list[];
+}; /* HW DATA */
+
+struct gdma_create_dma_region_resp {
+ struct gdma_resp_hdr hdr;
+ gdma_obj_handle_t dma_region_handle;
+}; /* HW DATA */
+
+/* GDMA_DMA_REGION_ADD_PAGES */
+struct gdma_dma_region_add_pages_req {
+ struct gdma_req_hdr hdr;
+
+ gdma_obj_handle_t dma_region_handle;
+
+ u32 page_addr_list_len;
+ u32 reserved3;
+
+ u64 page_addr_list[];
+}; /* HW DATA */
+
+/* GDMA_DESTROY_DMA_REGION */
+struct gdma_destroy_dma_region_req {
+ struct gdma_req_hdr hdr;
+
+ gdma_obj_handle_t dma_region_handle;
+}; /* HW DATA */
+
+enum gdma_pd_flags {
+ GDMA_PD_FLAG_INVALID = 0,
+};
+
+struct gdma_create_pd_req {
+ struct gdma_req_hdr hdr;
+ enum gdma_pd_flags flags;
+ u32 reserved;
+};/* HW DATA */
+
+struct gdma_create_pd_resp {
+ struct gdma_resp_hdr hdr;
+ gdma_obj_handle_t pd_handle;
+ u32 pd_id;
+ u32 reserved;
+};/* HW DATA */
+
+struct gdma_destroy_pd_req {
+ struct gdma_req_hdr hdr;
+ gdma_obj_handle_t pd_handle;
+};/* HW DATA */
+
+struct gdma_destory_pd_resp {
+ struct gdma_resp_hdr hdr;
+};/* HW DATA */
+
+enum gdma_mr_type {
+ /* Guest Virtual Address - MRs of this type allow access
+ * to memory mapped by PTEs associated with this MR using a virtual
+ * address that is set up in the MST
+ */
+ GDMA_MR_TYPE_GVA = 2,
+};
+
+struct gdma_create_mr_params {
+ gdma_obj_handle_t pd_handle;
+ enum gdma_mr_type mr_type;
+ union {
+ struct {
+ gdma_obj_handle_t dma_region_handle;
+ u64 virtual_address;
+ enum gdma_mr_access_flags access_flags;
+ } gva;
+ };
+};
+
+struct gdma_create_mr_request {
+ struct gdma_req_hdr hdr;
+ gdma_obj_handle_t pd_handle;
+ enum gdma_mr_type mr_type;
+ u32 reserved_1;
+
+ union {
+ struct {
+ gdma_obj_handle_t dma_region_handle;
+ u64 virtual_address;
+ enum gdma_mr_access_flags access_flags;
+ } gva;
+
+ };
+ u32 reserved_2;
+};/* HW DATA */
+
+struct gdma_create_mr_response {
+ struct gdma_resp_hdr hdr;
+ gdma_obj_handle_t mr_handle;
+ u32 lkey;
+ u32 rkey;
+};/* HW DATA */
+
+struct gdma_destroy_mr_request {
+ struct gdma_req_hdr hdr;
+ gdma_obj_handle_t mr_handle;
+};/* HW DATA */
+
+struct gdma_destroy_mr_response {
+ struct gdma_resp_hdr hdr;
+};/* HW DATA */
+
+int mana_gd_verify_vf_version(struct pci_dev *pdev);
+
+int mana_gd_register_device(struct gdma_dev *gd);
+int mana_gd_deregister_device(struct gdma_dev *gd);
+
+int mana_gd_post_work_request(struct gdma_queue *wq,
+ const struct gdma_wqe_request *wqe_req,
+ struct gdma_posted_wqe_info *wqe_info);
+
+int mana_gd_post_and_ring(struct gdma_queue *queue,
+ const struct gdma_wqe_request *wqe,
+ struct gdma_posted_wqe_info *wqe_info);
+
+int mana_gd_alloc_res_map(u32 res_avail, struct gdma_resource *r);
+void mana_gd_free_res_map(struct gdma_resource *r);
+
+void mana_gd_wq_ring_doorbell(struct gdma_context *gc,
+ struct gdma_queue *queue);
+
+int mana_gd_alloc_memory(struct gdma_context *gc, unsigned int length,
+ struct gdma_mem_info *gmi);
+
+void mana_gd_free_memory(struct gdma_mem_info *gmi);
+
+int mana_gd_send_request(struct gdma_context *gc, u32 req_len, const void *req,
+ u32 resp_len, void *resp);
+
+int mana_gd_destroy_dma_region(struct gdma_context *gc,
+ gdma_obj_handle_t dma_region_handle);
+
+#endif /* _GDMA_H */
diff --git a/include/net/mana/hw_channel.h b/include/net/mana/hw_channel.h
new file mode 100644
index 000000000000..6a757a6e2732
--- /dev/null
+++ b/include/net/mana/hw_channel.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _HW_CHANNEL_H
+#define _HW_CHANNEL_H
+
+#define DEFAULT_LOG2_THROTTLING_FOR_ERROR_EQ 4
+
+#define HW_CHANNEL_MAX_REQUEST_SIZE 0x1000
+#define HW_CHANNEL_MAX_RESPONSE_SIZE 0x1000
+
+#define HW_CHANNEL_VF_BOOTSTRAP_QUEUE_DEPTH 1
+
+#define HWC_INIT_DATA_CQID 1
+#define HWC_INIT_DATA_RQID 2
+#define HWC_INIT_DATA_SQID 3
+#define HWC_INIT_DATA_QUEUE_DEPTH 4
+#define HWC_INIT_DATA_MAX_REQUEST 5
+#define HWC_INIT_DATA_MAX_RESPONSE 6
+#define HWC_INIT_DATA_MAX_NUM_CQS 7
+#define HWC_INIT_DATA_PDID 8
+#define HWC_INIT_DATA_GPA_MKEY 9
+#define HWC_INIT_DATA_PF_DEST_RQ_ID 10
+#define HWC_INIT_DATA_PF_DEST_CQ_ID 11
+
+/* Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+union hwc_init_eq_id_db {
+ u32 as_uint32;
+
+ struct {
+ u32 eq_id : 16;
+ u32 doorbell : 16;
+ };
+}; /* HW DATA */
+
+union hwc_init_type_data {
+ u32 as_uint32;
+
+ struct {
+ u32 value : 24;
+ u32 type : 8;
+ };
+}; /* HW DATA */
+
+struct hwc_rx_oob {
+ u32 type : 6;
+ u32 eom : 1;
+ u32 som : 1;
+ u32 vendor_err : 8;
+ u32 reserved1 : 16;
+
+ u32 src_virt_wq : 24;
+ u32 src_vfid : 8;
+
+ u32 reserved2;
+
+ union {
+ u32 wqe_addr_low;
+ u32 wqe_offset;
+ };
+
+ u32 wqe_addr_high;
+
+ u32 client_data_unit : 14;
+ u32 reserved3 : 18;
+
+ u32 tx_oob_data_size;
+
+ u32 chunk_offset : 21;
+ u32 reserved4 : 11;
+}; /* HW DATA */
+
+struct hwc_tx_oob {
+ u32 reserved1;
+
+ u32 reserved2;
+
+ u32 vrq_id : 24;
+ u32 dest_vfid : 8;
+
+ u32 vrcq_id : 24;
+ u32 reserved3 : 8;
+
+ u32 vscq_id : 24;
+ u32 loopback : 1;
+ u32 lso_override: 1;
+ u32 dest_pf : 1;
+ u32 reserved4 : 5;
+
+ u32 vsq_id : 24;
+ u32 reserved5 : 8;
+}; /* HW DATA */
+
+struct hwc_work_request {
+ void *buf_va;
+ void *buf_sge_addr;
+ u32 buf_len;
+ u32 msg_size;
+
+ struct gdma_wqe_request wqe_req;
+ struct hwc_tx_oob tx_oob;
+
+ struct gdma_sge sge;
+};
+
+/* hwc_dma_buf represents the array of in-flight WQEs.
+ * mem_info as know as the GDMA mapped memory is partitioned and used by
+ * in-flight WQEs.
+ * The number of WQEs is determined by the number of in-flight messages.
+ */
+struct hwc_dma_buf {
+ struct gdma_mem_info mem_info;
+
+ u32 gpa_mkey;
+
+ u32 num_reqs;
+ struct hwc_work_request reqs[];
+};
+
+typedef void hwc_rx_event_handler_t(void *ctx, u32 gdma_rxq_id,
+ const struct hwc_rx_oob *rx_oob);
+
+typedef void hwc_tx_event_handler_t(void *ctx, u32 gdma_txq_id,
+ const struct hwc_rx_oob *rx_oob);
+
+struct hwc_cq {
+ struct hw_channel_context *hwc;
+
+ struct gdma_queue *gdma_cq;
+ struct gdma_queue *gdma_eq;
+ struct gdma_comp *comp_buf;
+ u16 queue_depth;
+
+ hwc_rx_event_handler_t *rx_event_handler;
+ void *rx_event_ctx;
+
+ hwc_tx_event_handler_t *tx_event_handler;
+ void *tx_event_ctx;
+};
+
+struct hwc_wq {
+ struct hw_channel_context *hwc;
+
+ struct gdma_queue *gdma_wq;
+ struct hwc_dma_buf *msg_buf;
+ u16 queue_depth;
+
+ struct hwc_cq *hwc_cq;
+};
+
+struct hwc_caller_ctx {
+ struct completion comp_event;
+ void *output_buf;
+ u32 output_buflen;
+
+ u32 error; /* Linux error code */
+ u32 status_code;
+};
+
+struct hw_channel_context {
+ struct gdma_dev *gdma_dev;
+ struct device *dev;
+
+ u16 num_inflight_msg;
+ u32 max_req_msg_size;
+
+ u16 hwc_init_q_depth_max;
+ u32 hwc_init_max_req_msg_size;
+ u32 hwc_init_max_resp_msg_size;
+
+ struct completion hwc_init_eqe_comp;
+
+ struct hwc_wq *rxq;
+ struct hwc_wq *txq;
+ struct hwc_cq *cq;
+
+ struct semaphore sema;
+ struct gdma_resource inflight_msg_res;
+
+ u32 pf_dest_vrq_id;
+ u32 pf_dest_vrcq_id;
+
+ struct hwc_caller_ctx *caller_ctx;
+};
+
+int mana_hwc_create_channel(struct gdma_context *gc);
+void mana_hwc_destroy_channel(struct gdma_context *gc);
+
+int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len,
+ const void *req, u32 resp_len, void *resp);
+
+#endif /* _HW_CHANNEL_H */
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
new file mode 100644
index 000000000000..575ea36ce606
--- /dev/null
+++ b/include/net/mana/mana.h
@@ -0,0 +1,648 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _MANA_H
+#define _MANA_H
+
+#include "gdma.h"
+#include "hw_channel.h"
+
+/* Microsoft Azure Network Adapter (MANA)'s definitions
+ *
+ * Structures labeled with "HW DATA" are exchanged with the hardware. All of
+ * them are naturally aligned and hence don't need __packed.
+ */
+
+/* MANA protocol version */
+#define MANA_MAJOR_VERSION 0
+#define MANA_MINOR_VERSION 1
+#define MANA_MICRO_VERSION 1
+
+typedef u64 mana_handle_t;
+#define INVALID_MANA_HANDLE ((mana_handle_t)-1)
+
+enum TRI_STATE {
+ TRI_STATE_UNKNOWN = -1,
+ TRI_STATE_FALSE = 0,
+ TRI_STATE_TRUE = 1
+};
+
+/* Number of entries for hardware indirection table must be in power of 2 */
+#define MANA_INDIRECT_TABLE_SIZE 64
+#define MANA_INDIRECT_TABLE_MASK (MANA_INDIRECT_TABLE_SIZE - 1)
+
+/* The Toeplitz hash key's length in bytes: should be multiple of 8 */
+#define MANA_HASH_KEY_SIZE 40
+
+#define COMP_ENTRY_SIZE 64
+
+#define ADAPTER_MTU_SIZE 1500
+#define MAX_FRAME_SIZE (ADAPTER_MTU_SIZE + 14)
+
+#define RX_BUFFERS_PER_QUEUE 512
+
+#define MAX_SEND_BUFFERS_PER_QUEUE 256
+
+#define EQ_SIZE (8 * PAGE_SIZE)
+#define LOG2_EQ_THROTTLE 3
+
+#define MAX_PORTS_IN_MANA_DEV 256
+
+struct mana_stats_rx {
+ u64 packets;
+ u64 bytes;
+ u64 xdp_drop;
+ u64 xdp_tx;
+ u64 xdp_redirect;
+ struct u64_stats_sync syncp;
+};
+
+struct mana_stats_tx {
+ u64 packets;
+ u64 bytes;
+ u64 xdp_xmit;
+ struct u64_stats_sync syncp;
+};
+
+struct mana_txq {
+ struct gdma_queue *gdma_sq;
+
+ union {
+ u32 gdma_txq_id;
+ struct {
+ u32 reserved1 : 10;
+ u32 vsq_frame : 14;
+ u32 reserved2 : 8;
+ };
+ };
+
+ u16 vp_offset;
+
+ struct net_device *ndev;
+
+ /* The SKBs are sent to the HW and we are waiting for the CQEs. */
+ struct sk_buff_head pending_skbs;
+ struct netdev_queue *net_txq;
+
+ atomic_t pending_sends;
+
+ struct mana_stats_tx stats;
+};
+
+/* skb data and frags dma mappings */
+struct mana_skb_head {
+ dma_addr_t dma_handle[MAX_SKB_FRAGS + 1];
+
+ u32 size[MAX_SKB_FRAGS + 1];
+};
+
+#define MANA_HEADROOM sizeof(struct mana_skb_head)
+
+enum mana_tx_pkt_format {
+ MANA_SHORT_PKT_FMT = 0,
+ MANA_LONG_PKT_FMT = 1,
+};
+
+struct mana_tx_short_oob {
+ u32 pkt_fmt : 2;
+ u32 is_outer_ipv4 : 1;
+ u32 is_outer_ipv6 : 1;
+ u32 comp_iphdr_csum : 1;
+ u32 comp_tcp_csum : 1;
+ u32 comp_udp_csum : 1;
+ u32 supress_txcqe_gen : 1;
+ u32 vcq_num : 24;
+
+ u32 trans_off : 10; /* Transport header offset */
+ u32 vsq_frame : 14;
+ u32 short_vp_offset : 8;
+}; /* HW DATA */
+
+struct mana_tx_long_oob {
+ u32 is_encap : 1;
+ u32 inner_is_ipv6 : 1;
+ u32 inner_tcp_opt : 1;
+ u32 inject_vlan_pri_tag : 1;
+ u32 reserved1 : 12;
+ u32 pcp : 3; /* 802.1Q */
+ u32 dei : 1; /* 802.1Q */
+ u32 vlan_id : 12; /* 802.1Q */
+
+ u32 inner_frame_offset : 10;
+ u32 inner_ip_rel_offset : 6;
+ u32 long_vp_offset : 12;
+ u32 reserved2 : 4;
+
+ u32 reserved3;
+ u32 reserved4;
+}; /* HW DATA */
+
+struct mana_tx_oob {
+ struct mana_tx_short_oob s_oob;
+ struct mana_tx_long_oob l_oob;
+}; /* HW DATA */
+
+enum mana_cq_type {
+ MANA_CQ_TYPE_RX,
+ MANA_CQ_TYPE_TX,
+};
+
+enum mana_cqe_type {
+ CQE_INVALID = 0,
+ CQE_RX_OKAY = 1,
+ CQE_RX_COALESCED_4 = 2,
+ CQE_RX_OBJECT_FENCE = 3,
+ CQE_RX_TRUNCATED = 4,
+
+ CQE_TX_OKAY = 32,
+ CQE_TX_SA_DROP = 33,
+ CQE_TX_MTU_DROP = 34,
+ CQE_TX_INVALID_OOB = 35,
+ CQE_TX_INVALID_ETH_TYPE = 36,
+ CQE_TX_HDR_PROCESSING_ERROR = 37,
+ CQE_TX_VF_DISABLED = 38,
+ CQE_TX_VPORT_IDX_OUT_OF_RANGE = 39,
+ CQE_TX_VPORT_DISABLED = 40,
+ CQE_TX_VLAN_TAGGING_VIOLATION = 41,
+};
+
+#define MANA_CQE_COMPLETION 1
+
+struct mana_cqe_header {
+ u32 cqe_type : 6;
+ u32 client_type : 2;
+ u32 vendor_err : 24;
+}; /* HW DATA */
+
+/* NDIS HASH Types */
+#define NDIS_HASH_IPV4 BIT(0)
+#define NDIS_HASH_TCP_IPV4 BIT(1)
+#define NDIS_HASH_UDP_IPV4 BIT(2)
+#define NDIS_HASH_IPV6 BIT(3)
+#define NDIS_HASH_TCP_IPV6 BIT(4)
+#define NDIS_HASH_UDP_IPV6 BIT(5)
+#define NDIS_HASH_IPV6_EX BIT(6)
+#define NDIS_HASH_TCP_IPV6_EX BIT(7)
+#define NDIS_HASH_UDP_IPV6_EX BIT(8)
+
+#define MANA_HASH_L3 (NDIS_HASH_IPV4 | NDIS_HASH_IPV6 | NDIS_HASH_IPV6_EX)
+#define MANA_HASH_L4 \
+ (NDIS_HASH_TCP_IPV4 | NDIS_HASH_UDP_IPV4 | NDIS_HASH_TCP_IPV6 | \
+ NDIS_HASH_UDP_IPV6 | NDIS_HASH_TCP_IPV6_EX | NDIS_HASH_UDP_IPV6_EX)
+
+struct mana_rxcomp_perpkt_info {
+ u32 pkt_len : 16;
+ u32 reserved1 : 16;
+ u32 reserved2;
+ u32 pkt_hash;
+}; /* HW DATA */
+
+#define MANA_RXCOMP_OOB_NUM_PPI 4
+
+/* Receive completion OOB */
+struct mana_rxcomp_oob {
+ struct mana_cqe_header cqe_hdr;
+
+ u32 rx_vlan_id : 12;
+ u32 rx_vlantag_present : 1;
+ u32 rx_outer_iphdr_csum_succeed : 1;
+ u32 rx_outer_iphdr_csum_fail : 1;
+ u32 reserved1 : 1;
+ u32 rx_hashtype : 9;
+ u32 rx_iphdr_csum_succeed : 1;
+ u32 rx_iphdr_csum_fail : 1;
+ u32 rx_tcp_csum_succeed : 1;
+ u32 rx_tcp_csum_fail : 1;
+ u32 rx_udp_csum_succeed : 1;
+ u32 rx_udp_csum_fail : 1;
+ u32 reserved2 : 1;
+
+ struct mana_rxcomp_perpkt_info ppi[MANA_RXCOMP_OOB_NUM_PPI];
+
+ u32 rx_wqe_offset;
+}; /* HW DATA */
+
+struct mana_tx_comp_oob {
+ struct mana_cqe_header cqe_hdr;
+
+ u32 tx_data_offset;
+
+ u32 tx_sgl_offset : 5;
+ u32 tx_wqe_offset : 27;
+
+ u32 reserved[12];
+}; /* HW DATA */
+
+struct mana_rxq;
+
+#define CQE_POLLING_BUFFER 512
+
+struct mana_cq {
+ struct gdma_queue *gdma_cq;
+
+ /* Cache the CQ id (used to verify if each CQE comes to the right CQ. */
+ u32 gdma_id;
+
+ /* Type of the CQ: TX or RX */
+ enum mana_cq_type type;
+
+ /* Pointer to the mana_rxq that is pushing RX CQEs to the queue.
+ * Only and must be non-NULL if type is MANA_CQ_TYPE_RX.
+ */
+ struct mana_rxq *rxq;
+
+ /* Pointer to the mana_txq that is pushing TX CQEs to the queue.
+ * Only and must be non-NULL if type is MANA_CQ_TYPE_TX.
+ */
+ struct mana_txq *txq;
+
+ /* Buffer which the CQ handler can copy the CQE's into. */
+ struct gdma_comp gdma_comp_buf[CQE_POLLING_BUFFER];
+
+ /* NAPI data */
+ struct napi_struct napi;
+ int work_done;
+ int budget;
+};
+
+struct mana_recv_buf_oob {
+ /* A valid GDMA work request representing the data buffer. */
+ struct gdma_wqe_request wqe_req;
+
+ void *buf_va;
+ dma_addr_t buf_dma_addr;
+
+ /* SGL of the buffer going to be sent has part of the work request. */
+ u32 num_sge;
+ struct gdma_sge sgl[MAX_RX_WQE_SGL_ENTRIES];
+
+ /* Required to store the result of mana_gd_post_work_request.
+ * gdma_posted_wqe_info.wqe_size_in_bu is required for progressing the
+ * work queue when the WQE is consumed.
+ */
+ struct gdma_posted_wqe_info wqe_inf;
+};
+
+struct mana_rxq {
+ struct gdma_queue *gdma_rq;
+ /* Cache the gdma receive queue id */
+ u32 gdma_id;
+
+ /* Index of RQ in the vPort, not gdma receive queue id */
+ u32 rxq_idx;
+
+ u32 datasize;
+
+ mana_handle_t rxobj;
+
+ struct mana_cq rx_cq;
+
+ struct completion fence_event;
+
+ struct net_device *ndev;
+
+ /* Total number of receive buffers to be allocated */
+ u32 num_rx_buf;
+
+ u32 buf_index;
+
+ struct mana_stats_rx stats;
+
+ struct bpf_prog __rcu *bpf_prog;
+ struct xdp_rxq_info xdp_rxq;
+ struct page *xdp_save_page;
+ bool xdp_flush;
+ int xdp_rc; /* XDP redirect return code */
+
+ /* MUST BE THE LAST MEMBER:
+ * Each receive buffer has an associated mana_recv_buf_oob.
+ */
+ struct mana_recv_buf_oob rx_oobs[];
+};
+
+struct mana_tx_qp {
+ struct mana_txq txq;
+
+ struct mana_cq tx_cq;
+
+ mana_handle_t tx_object;
+};
+
+struct mana_ethtool_stats {
+ u64 stop_queue;
+ u64 wake_queue;
+};
+
+struct mana_context {
+ struct gdma_dev *gdma_dev;
+
+ u16 num_ports;
+
+ struct mana_eq *eqs;
+
+ struct net_device *ports[MAX_PORTS_IN_MANA_DEV];
+};
+
+struct mana_port_context {
+ struct mana_context *ac;
+ struct net_device *ndev;
+
+ u8 mac_addr[ETH_ALEN];
+
+ enum TRI_STATE rss_state;
+
+ mana_handle_t default_rxobj;
+ bool tx_shortform_allowed;
+ u16 tx_vp_offset;
+
+ struct mana_tx_qp *tx_qp;
+
+ /* Indirection Table for RX & TX. The values are queue indexes */
+ u32 indir_table[MANA_INDIRECT_TABLE_SIZE];
+
+ /* Indirection table containing RxObject Handles */
+ mana_handle_t rxobj_table[MANA_INDIRECT_TABLE_SIZE];
+
+ /* Hash key used by the NIC */
+ u8 hashkey[MANA_HASH_KEY_SIZE];
+
+ /* This points to an array of num_queues of RQ pointers. */
+ struct mana_rxq **rxqs;
+
+ struct bpf_prog *bpf_prog;
+
+ /* Create num_queues EQs, SQs, SQ-CQs, RQs and RQ-CQs, respectively. */
+ unsigned int max_queues;
+ unsigned int num_queues;
+
+ mana_handle_t port_handle;
+ mana_handle_t pf_filter_handle;
+
+ /* Mutex for sharing access to vport_use_count */
+ struct mutex vport_mutex;
+ int vport_use_count;
+
+ u16 port_idx;
+
+ bool port_is_up;
+ bool port_st_save; /* Saved port state */
+
+ struct mana_ethtool_stats eth_stats;
+};
+
+netdev_tx_t mana_start_xmit(struct sk_buff *skb, struct net_device *ndev);
+int mana_config_rss(struct mana_port_context *ac, enum TRI_STATE rx,
+ bool update_hash, bool update_tab);
+
+int mana_alloc_queues(struct net_device *ndev);
+int mana_attach(struct net_device *ndev);
+int mana_detach(struct net_device *ndev, bool from_close);
+
+int mana_probe(struct gdma_dev *gd, bool resuming);
+void mana_remove(struct gdma_dev *gd, bool suspending);
+
+void mana_xdp_tx(struct sk_buff *skb, struct net_device *ndev);
+int mana_xdp_xmit(struct net_device *ndev, int n, struct xdp_frame **frames,
+ u32 flags);
+u32 mana_run_xdp(struct net_device *ndev, struct mana_rxq *rxq,
+ struct xdp_buff *xdp, void *buf_va, uint pkt_len);
+struct bpf_prog *mana_xdp_get(struct mana_port_context *apc);
+void mana_chn_setxdp(struct mana_port_context *apc, struct bpf_prog *prog);
+int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf);
+
+extern const struct ethtool_ops mana_ethtool_ops;
+
+struct mana_obj_spec {
+ u32 queue_index;
+ u64 gdma_region;
+ u32 queue_size;
+ u32 attached_eq;
+ u32 modr_ctx_id;
+};
+
+enum mana_command_code {
+ MANA_QUERY_DEV_CONFIG = 0x20001,
+ MANA_QUERY_GF_STAT = 0x20002,
+ MANA_CONFIG_VPORT_TX = 0x20003,
+ MANA_CREATE_WQ_OBJ = 0x20004,
+ MANA_DESTROY_WQ_OBJ = 0x20005,
+ MANA_FENCE_RQ = 0x20006,
+ MANA_CONFIG_VPORT_RX = 0x20007,
+ MANA_QUERY_VPORT_CONFIG = 0x20008,
+
+ /* Privileged commands for the PF mode */
+ MANA_REGISTER_FILTER = 0x28000,
+ MANA_DEREGISTER_FILTER = 0x28001,
+ MANA_REGISTER_HW_PORT = 0x28003,
+ MANA_DEREGISTER_HW_PORT = 0x28004,
+};
+
+/* Query Device Configuration */
+struct mana_query_device_cfg_req {
+ struct gdma_req_hdr hdr;
+
+ /* MANA Nic Driver Capability flags */
+ u64 mn_drv_cap_flags1;
+ u64 mn_drv_cap_flags2;
+ u64 mn_drv_cap_flags3;
+ u64 mn_drv_cap_flags4;
+
+ u32 proto_major_ver;
+ u32 proto_minor_ver;
+ u32 proto_micro_ver;
+
+ u32 reserved;
+}; /* HW DATA */
+
+struct mana_query_device_cfg_resp {
+ struct gdma_resp_hdr hdr;
+
+ u64 pf_cap_flags1;
+ u64 pf_cap_flags2;
+ u64 pf_cap_flags3;
+ u64 pf_cap_flags4;
+
+ u16 max_num_vports;
+ u16 reserved;
+ u32 max_num_eqs;
+}; /* HW DATA */
+
+/* Query vPort Configuration */
+struct mana_query_vport_cfg_req {
+ struct gdma_req_hdr hdr;
+ u32 vport_index;
+}; /* HW DATA */
+
+struct mana_query_vport_cfg_resp {
+ struct gdma_resp_hdr hdr;
+ u32 max_num_sq;
+ u32 max_num_rq;
+ u32 num_indirection_ent;
+ u32 reserved1;
+ u8 mac_addr[6];
+ u8 reserved2[2];
+ mana_handle_t vport;
+}; /* HW DATA */
+
+/* Configure vPort */
+struct mana_config_vport_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t vport;
+ u32 pdid;
+ u32 doorbell_pageid;
+}; /* HW DATA */
+
+struct mana_config_vport_resp {
+ struct gdma_resp_hdr hdr;
+ u16 tx_vport_offset;
+ u8 short_form_allowed;
+ u8 reserved;
+}; /* HW DATA */
+
+/* Create WQ Object */
+struct mana_create_wqobj_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t vport;
+ u32 wq_type;
+ u32 reserved;
+ u64 wq_gdma_region;
+ u64 cq_gdma_region;
+ u32 wq_size;
+ u32 cq_size;
+ u32 cq_moderation_ctx_id;
+ u32 cq_parent_qid;
+}; /* HW DATA */
+
+struct mana_create_wqobj_resp {
+ struct gdma_resp_hdr hdr;
+ u32 wq_id;
+ u32 cq_id;
+ mana_handle_t wq_obj;
+}; /* HW DATA */
+
+/* Destroy WQ Object */
+struct mana_destroy_wqobj_req {
+ struct gdma_req_hdr hdr;
+ u32 wq_type;
+ u32 reserved;
+ mana_handle_t wq_obj_handle;
+}; /* HW DATA */
+
+struct mana_destroy_wqobj_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+/* Fence RQ */
+struct mana_fence_rq_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t wq_obj_handle;
+}; /* HW DATA */
+
+struct mana_fence_rq_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+/* Configure vPort Rx Steering */
+struct mana_cfg_rx_steer_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t vport;
+ u16 num_indir_entries;
+ u16 indir_tab_offset;
+ u32 rx_enable;
+ u32 rss_enable;
+ u8 update_default_rxobj;
+ u8 update_hashkey;
+ u8 update_indir_tab;
+ u8 reserved;
+ mana_handle_t default_rxobj;
+ u8 hashkey[MANA_HASH_KEY_SIZE];
+}; /* HW DATA */
+
+struct mana_cfg_rx_steer_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+/* Register HW vPort */
+struct mana_register_hw_vport_req {
+ struct gdma_req_hdr hdr;
+ u16 attached_gfid;
+ u8 is_pf_default_vport;
+ u8 reserved1;
+ u8 allow_all_ether_types;
+ u8 reserved2;
+ u8 reserved3;
+ u8 reserved4;
+}; /* HW DATA */
+
+struct mana_register_hw_vport_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t hw_vport_handle;
+}; /* HW DATA */
+
+/* Deregister HW vPort */
+struct mana_deregister_hw_vport_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t hw_vport_handle;
+}; /* HW DATA */
+
+struct mana_deregister_hw_vport_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+/* Register filter */
+struct mana_register_filter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t vport;
+ u8 mac_addr[6];
+ u8 reserved1;
+ u8 reserved2;
+ u8 reserved3;
+ u8 reserved4;
+ u16 reserved5;
+ u32 reserved6;
+ u32 reserved7;
+ u32 reserved8;
+}; /* HW DATA */
+
+struct mana_register_filter_resp {
+ struct gdma_resp_hdr hdr;
+ mana_handle_t filter_handle;
+}; /* HW DATA */
+
+/* Deregister filter */
+struct mana_deregister_filter_req {
+ struct gdma_req_hdr hdr;
+ mana_handle_t filter_handle;
+}; /* HW DATA */
+
+struct mana_deregister_filter_resp {
+ struct gdma_resp_hdr hdr;
+}; /* HW DATA */
+
+#define MANA_MAX_NUM_QUEUES 64
+
+#define MANA_SHORT_VPORT_OFFSET_MAX ((1U << 8) - 1)
+
+struct mana_tx_package {
+ struct gdma_wqe_request wqe_req;
+ struct gdma_sge sgl_array[5];
+ struct gdma_sge *sgl_ptr;
+
+ struct mana_tx_oob tx_oob;
+
+ struct gdma_posted_wqe_info wqe_info;
+};
+
+int mana_create_wq_obj(struct mana_port_context *apc,
+ mana_handle_t vport,
+ u32 wq_type, struct mana_obj_spec *wq_spec,
+ struct mana_obj_spec *cq_spec,
+ mana_handle_t *wq_obj);
+
+void mana_destroy_wq_obj(struct mana_port_context *apc, u32 wq_type,
+ mana_handle_t wq_obj);
+
+int mana_cfg_vport(struct mana_port_context *apc, u32 protection_dom_id,
+ u32 doorbell_pg_id);
+void mana_uncfg_vport(struct mana_port_context *apc);
+#endif /* _MANA_H */
diff --git a/include/net/mana/mana_auxiliary.h b/include/net/mana/mana_auxiliary.h
new file mode 100644
index 000000000000..373d59756846
--- /dev/null
+++ b/include/net/mana/mana_auxiliary.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2022, Microsoft Corporation. */
+
+#include "mana.h"
+#include <linux/auxiliary_bus.h>
+
+struct mana_adev {
+ struct auxiliary_device adev;
+ struct gdma_dev *mdev;
+};
diff --git a/include/net/mana/shm_channel.h b/include/net/mana/shm_channel.h
new file mode 100644
index 000000000000..5199b41497ff
--- /dev/null
+++ b/include/net/mana/shm_channel.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright (c) 2021, Microsoft Corporation. */
+
+#ifndef _SHM_CHANNEL_H
+#define _SHM_CHANNEL_H
+
+struct shm_channel {
+ struct device *dev;
+ void __iomem *base;
+};
+
+void mana_smc_init(struct shm_channel *sc, struct device *dev,
+ void __iomem *base);
+
+int mana_smc_setup_hwc(struct shm_channel *sc, bool reset_vf, u64 eq_addr,
+ u64 cq_addr, u64 rq_addr, u64 sq_addr,
+ u32 eq_msix_index);
+
+int mana_smc_teardown_hwc(struct shm_channel *sc, bool reset_vf);
+
+#endif /* _SHM_CHANNEL_H */
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 412479ebf5ad..3c5c68618fcc 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -97,8 +97,6 @@ struct mptcp_out_options {
};
#ifdef CONFIG_MPTCP
-extern struct request_sock_ops mptcp_subflow_request_sock_ops;
-
void mptcp_init(void);
static inline bool sk_is_mptcp(const struct sock *sk)
@@ -188,6 +186,9 @@ void mptcp_seq_show(struct seq_file *seq);
int mptcp_subflow_init_cookie_req(struct request_sock *req,
const struct sock *sk_listener,
struct sk_buff *skb);
+struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener,
+ bool attach_listener);
__be32 mptcp_get_reset_option(const struct sk_buff *skb);
@@ -274,6 +275,13 @@ static inline int mptcp_subflow_init_cookie_req(struct request_sock *req,
return 0; /* TCP fallback */
}
+static inline struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *ops,
+ struct sock *sk_listener,
+ bool attach_listener)
+{
+ return NULL;
+}
+
static inline __be32 mptcp_reset_option(const struct sk_buff *skb) { return htonl(0u); }
#endif /* CONFIG_MPTCP */
diff --git a/include/net/mrp.h b/include/net/mrp.h
index 92cd3fb6cf9d..b28915ffea28 100644
--- a/include/net/mrp.h
+++ b/include/net/mrp.h
@@ -124,6 +124,7 @@ struct mrp_applicant {
struct sk_buff *pdu;
struct rb_root mad;
struct rcu_head rcu;
+ bool active;
};
struct mrp_port {
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 8c3587d5c308..78beaa765c73 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -92,7 +92,9 @@ struct net {
struct ns_common ns;
struct ref_tracker_dir refcnt_tracker;
-
+ struct ref_tracker_dir notrefcnt_tracker; /* tracker for objects not
+ * refcounted against netns
+ */
struct list_head dev_base_head;
struct proc_dir_entry *proc_net;
struct proc_dir_entry *proc_net_stat;
@@ -320,19 +322,31 @@ static inline int check_net(const struct net *net)
#endif
-static inline void netns_tracker_alloc(struct net *net,
- netns_tracker *tracker, gfp_t gfp)
+static inline void __netns_tracker_alloc(struct net *net,
+ netns_tracker *tracker,
+ bool refcounted,
+ gfp_t gfp)
{
#ifdef CONFIG_NET_NS_REFCNT_TRACKER
- ref_tracker_alloc(&net->refcnt_tracker, tracker, gfp);
+ ref_tracker_alloc(refcounted ? &net->refcnt_tracker :
+ &net->notrefcnt_tracker,
+ tracker, gfp);
#endif
}
-static inline void netns_tracker_free(struct net *net,
- netns_tracker *tracker)
+static inline void netns_tracker_alloc(struct net *net, netns_tracker *tracker,
+ gfp_t gfp)
+{
+ __netns_tracker_alloc(net, tracker, true, gfp);
+}
+
+static inline void __netns_tracker_free(struct net *net,
+ netns_tracker *tracker,
+ bool refcounted)
{
#ifdef CONFIG_NET_NS_REFCNT_TRACKER
- ref_tracker_free(&net->refcnt_tracker, tracker);
+ ref_tracker_free(refcounted ? &net->refcnt_tracker :
+ &net->notrefcnt_tracker, tracker);
#endif
}
@@ -346,7 +360,7 @@ static inline struct net *get_net_track(struct net *net,
static inline void put_net_track(struct net *net, netns_tracker *tracker)
{
- netns_tracker_free(net, tracker);
+ __netns_tracker_free(net, tracker, true);
put_net(net);
}
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index b2b9de70d9f4..71d1269fe4d4 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -71,8 +71,7 @@ static inline int nf_conntrack_confirm(struct sk_buff *skb)
return ret;
}
-unsigned int nf_confirm(struct sk_buff *skb, unsigned int protoff,
- struct nf_conn *ct, enum ip_conntrack_info ctinfo);
+unsigned int nf_confirm(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);
void print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l4proto *proto);
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 9939c366f720..f30b1694b690 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -115,6 +115,11 @@ struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
gfp_t flags);
+int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, u16 proto);
+int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
+ u8 proto, bool nat, struct nf_conntrack_helper **hp);
+
void nf_ct_helper_destroy(struct nf_conn *ct);
static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index e9eb01e99d2f..9877f064548a 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -104,6 +104,10 @@ unsigned int
nf_nat_inet_fn(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state);
+int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, int *action,
+ const struct nf_nat_range2 *range, bool commit);
+
static inline int nf_nat_initialized(const struct nf_conn *ct,
enum nf_nat_manip_type manip)
{
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index cdb7db9b0e25..e69ce23566ea 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -24,6 +24,7 @@ struct module;
enum {
NFT_PKTINFO_L4PROTO = (1 << 0),
NFT_PKTINFO_INNER = (1 << 1),
+ NFT_PKTINFO_INNER_FULL = (1 << 2),
};
struct nft_pktinfo {
@@ -32,8 +33,8 @@ struct nft_pktinfo {
u8 flags;
u8 tprot;
u16 fragoff;
- unsigned int thoff;
- unsigned int inneroff;
+ u16 thoff;
+ u16 inneroff;
};
static inline struct sock *nft_sk(const struct nft_pktinfo *pkt)
@@ -375,10 +376,14 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
return (void *)expr->data;
}
+struct nft_expr_info;
+
+int nft_expr_inner_parse(const struct nft_ctx *ctx, const struct nlattr *nla,
+ struct nft_expr_info *info);
int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src);
void nft_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr);
int nft_expr_dump(struct sk_buff *skb, unsigned int attr,
- const struct nft_expr *expr);
+ const struct nft_expr *expr, bool reset);
bool nft_expr_reduce_bitwise(struct nft_regs_track *track,
const struct nft_expr *expr);
@@ -864,6 +869,7 @@ struct nft_expr_type {
const struct nlattr * const tb[]);
void (*release_ops)(const struct nft_expr_ops *ops);
const struct nft_expr_ops *ops;
+ const struct nft_expr_ops *inner_ops;
struct list_head list;
const char *name;
struct module *owner;
@@ -921,7 +927,8 @@ struct nft_expr_ops {
void (*destroy_clone)(const struct nft_ctx *ctx,
const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb,
- const struct nft_expr *expr);
+ const struct nft_expr *expr,
+ bool reset);
int (*validate)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data);
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index 1223af68cd9a..3e825381ac5c 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -18,6 +18,8 @@ extern struct nft_expr_type nft_meta_type;
extern struct nft_expr_type nft_rt_type;
extern struct nft_expr_type nft_exthdr_type;
extern struct nft_expr_type nft_last_type;
+extern struct nft_expr_type nft_objref_type;
+extern struct nft_expr_type nft_inner_type;
#ifdef CONFIG_NETWORK_SECMARK
extern struct nft_object_type nft_secmark_obj_type;
@@ -66,16 +68,6 @@ struct nft_payload {
u8 dreg;
};
-struct nft_payload_set {
- enum nft_payload_bases base:8;
- u8 offset;
- u8 len;
- u8 sreg;
- u8 csum_type;
- u8 csum_offset;
- u8 csum_flags;
-};
-
extern const struct nft_expr_ops nft_payload_fast_ops;
extern const struct nft_expr_ops nft_bitwise_fast_ops;
@@ -148,4 +140,28 @@ void nft_rt_get_eval(const struct nft_expr *expr,
struct nft_regs *regs, const struct nft_pktinfo *pkt);
void nft_counter_eval(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt);
+
+enum {
+ NFT_PAYLOAD_CTX_INNER_TUN = (1 << 0),
+ NFT_PAYLOAD_CTX_INNER_LL = (1 << 1),
+ NFT_PAYLOAD_CTX_INNER_NH = (1 << 2),
+ NFT_PAYLOAD_CTX_INNER_TH = (1 << 3),
+};
+
+struct nft_inner_tun_ctx {
+ u16 type;
+ u16 inner_tunoff;
+ u16 inner_lloff;
+ u16 inner_nhoff;
+ u16 inner_thoff;
+ __be16 llproto;
+ u8 l4proto;
+ u8 flags;
+};
+
+int nft_payload_inner_offset(const struct nft_pktinfo *pkt);
+void nft_payload_inner_eval(const struct nft_expr *expr, struct nft_regs *regs,
+ const struct nft_pktinfo *pkt,
+ struct nft_inner_tun_ctx *ctx);
+
#endif /* _NET_NF_TABLES_CORE_H */
diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h
index c4a6147b0ef8..112708f7a6b4 100644
--- a/include/net/netfilter/nf_tables_ipv4.h
+++ b/include/net/netfilter/nf_tables_ipv4.h
@@ -35,6 +35,8 @@ static inline int __nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt)
return -1;
else if (len < thoff)
return -1;
+ else if (thoff < sizeof(*iph))
+ return -1;
pkt->flags = NFT_PKTINFO_L4PROTO;
pkt->tprot = iph->protocol;
@@ -69,6 +71,8 @@ static inline int nft_set_pktinfo_ipv4_ingress(struct nft_pktinfo *pkt)
return -1;
} else if (len < thoff) {
goto inhdr_error;
+ } else if (thoff < sizeof(*iph)) {
+ return -1;
}
pkt->flags = NFT_PKTINFO_L4PROTO;
diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h
index ec7eaeaf4f04..467d59b9e533 100644
--- a/include/net/netfilter/nf_tables_ipv6.h
+++ b/include/net/netfilter/nf_tables_ipv6.h
@@ -13,7 +13,7 @@ static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt)
unsigned short frag_off;
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
- if (protohdr < 0) {
+ if (protohdr < 0 || thoff > U16_MAX) {
nft_set_pktinfo_unspec(pkt);
return;
}
@@ -47,7 +47,7 @@ static inline int __nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt)
return -1;
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
- if (protohdr < 0)
+ if (protohdr < 0 || thoff > U16_MAX)
return -1;
pkt->flags = NFT_PKTINFO_L4PROTO;
@@ -93,7 +93,7 @@ static inline int nft_set_pktinfo_ipv6_ingress(struct nft_pktinfo *pkt)
}
protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, &flags);
- if (protohdr < 0)
+ if (protohdr < 0 || thoff > U16_MAX)
goto inhdr_error;
pkt->flags = NFT_PKTINFO_L4PROTO;
diff --git a/include/net/netfilter/nft_fib.h b/include/net/netfilter/nft_fib.h
index eed099eae672..167640b843ef 100644
--- a/include/net/netfilter/nft_fib.h
+++ b/include/net/netfilter/nft_fib.h
@@ -18,7 +18,7 @@ nft_fib_is_loopback(const struct sk_buff *skb, const struct net_device *in)
return skb->pkt_type == PACKET_LOOPBACK || in->flags & IFF_LOOPBACK;
}
-int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr);
+int nft_fib_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset);
int nft_fib_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
const struct nlattr * const tb[]);
int nft_fib_validate(const struct nft_ctx *ctx, const struct nft_expr *expr,
diff --git a/include/net/netfilter/nft_meta.h b/include/net/netfilter/nft_meta.h
index 9b51cc67de54..ba1238f12a48 100644
--- a/include/net/netfilter/nft_meta.h
+++ b/include/net/netfilter/nft_meta.h
@@ -24,10 +24,10 @@ int nft_meta_set_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[]);
int nft_meta_get_dump(struct sk_buff *skb,
- const struct nft_expr *expr);
+ const struct nft_expr *expr, bool reset);
int nft_meta_set_dump(struct sk_buff *skb,
- const struct nft_expr *expr);
+ const struct nft_expr *expr, bool reset);
void nft_meta_get_eval(const struct nft_expr *expr,
struct nft_regs *regs,
@@ -46,4 +46,10 @@ int nft_meta_set_validate(const struct nft_ctx *ctx,
bool nft_meta_get_reduce(struct nft_regs_track *track,
const struct nft_expr *expr);
+
+struct nft_inner_tun_ctx;
+void nft_meta_inner_eval(const struct nft_expr *expr,
+ struct nft_regs *regs, const struct nft_pktinfo *pkt,
+ struct nft_inner_tun_ctx *tun_ctx);
+
#endif
diff --git a/include/net/netfilter/nft_reject.h b/include/net/netfilter/nft_reject.h
index 56b123a42220..6d9ba62efd75 100644
--- a/include/net/netfilter/nft_reject.h
+++ b/include/net/netfilter/nft_reject.h
@@ -22,7 +22,8 @@ int nft_reject_init(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[]);
-int nft_reject_dump(struct sk_buff *skb, const struct nft_expr *expr);
+int nft_reject_dump(struct sk_buff *skb,
+ const struct nft_expr *expr, bool reset);
int nft_reject_icmp_code(u8 code);
int nft_reject_icmpv6_code(u8 code);
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 6bfa972f2fbf..6e1e670e06bc 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -906,6 +906,17 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh)
}
/**
+ * nlmsg_seq - return the seq number of netlink message
+ * @nlh: netlink message header
+ *
+ * Returns 0 if netlink message is NULL
+ */
+static inline u32 nlmsg_seq(const struct nlmsghdr *nlh)
+{
+ return nlh ? nlh->nlmsg_seq : 0;
+}
+
+/**
* nlmsg_for_each_attr - iterate over a stream of attributes
* @pos: loop counter, set to current attribute
* @nlh: netlink message header
@@ -938,6 +949,27 @@ static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 se
}
/**
+ * nlmsg_append - Add more data to a nlmsg in a skb
+ * @skb: socket buffer to store message in
+ * @size: length of message payload
+ *
+ * Append data to an existing nlmsg, used when constructing a message
+ * with multiple fixed-format headers (which is rare).
+ * Returns NULL if the tailroom of the skb is insufficient to store
+ * the extra payload.
+ */
+static inline void *nlmsg_append(struct sk_buff *skb, u32 size)
+{
+ if (unlikely(skb_tailroom(skb) < NLMSG_ALIGN(size)))
+ return NULL;
+
+ if (NLMSG_ALIGN(size) - size)
+ memset(skb_tail_pointer(skb) + size, 0,
+ NLMSG_ALIGN(size) - size);
+ return __skb_put(skb, NLMSG_ALIGN(size));
+}
+
+/**
* nlmsg_put_answer - Add a new callback based netlink message to an skb
* @skb: socket buffer to store message in
* @cb: netlink callback
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 1b8004679445..db762e35aca9 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -43,6 +43,7 @@ struct tcp_fastopen_context;
struct netns_ipv4 {
struct inet_timewait_death_row tcp_death_row;
+ struct udp_table *udp_table;
#ifdef CONFIG_SYSCTL
struct ctl_table_header *forw_hdr;
@@ -183,6 +184,11 @@ struct netns_ipv4 {
unsigned long tfo_active_disable_stamp;
u32 tcp_challenge_timestamp;
u32 tcp_challenge_count;
+ u8 sysctl_tcp_plb_enabled;
+ u8 sysctl_tcp_plb_idle_rehash_rounds;
+ u8 sysctl_tcp_plb_rehash_rounds;
+ u8 sysctl_tcp_plb_suspend_rto_sec;
+ int sysctl_tcp_plb_cong_thresh;
int sysctl_udp_wmem_min;
int sysctl_udp_rmem_min;
@@ -202,6 +208,8 @@ struct netns_ipv4 {
atomic_t dev_addr_genid;
+ unsigned int sysctl_udp_child_hash_entries;
+
#ifdef CONFIG_SYSCTL
unsigned long *sysctl_local_reserved_ports;
int sysctl_ip_prot_sock;
diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h
index a681147aecd8..7eff3d981b89 100644
--- a/include/net/netns/sctp.h
+++ b/include/net/netns/sctp.h
@@ -175,6 +175,10 @@ struct netns_sctp {
/* Threshold for autoclose timeout, in seconds. */
unsigned long max_autoclose;
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+ int l3mdev_accept;
+#endif
};
#endif /* __NETNS_SCTP_H__ */
diff --git a/include/net/netns/xdp.h b/include/net/netns/xdp.h
index e5734261ba0a..21a4f25a187a 100644
--- a/include/net/netns/xdp.h
+++ b/include/net/netns/xdp.h
@@ -2,8 +2,8 @@
#ifndef __NETNS_XDP_H__
#define __NETNS_XDP_H__
-#include <linux/rculist.h>
#include <linux/mutex.h>
+#include <linux/types.h>
struct netns_xdp {
struct mutex lock;
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index f5850b569c52..b79a89d5207c 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -72,6 +72,8 @@ enum nl802154_commands {
NL802154_CMD_NEW_SEC_LEVEL,
NL802154_CMD_DEL_SEC_LEVEL,
+ NL802154_CMD_SCAN_EVENT,
+
/* add new commands above here */
/* used to define NL802154_CMD_MAX below */
@@ -131,6 +133,8 @@ enum nl802154_attrs {
NL802154_ATTR_PID,
NL802154_ATTR_NETNS_FD,
+ NL802154_ATTR_COORDINATOR,
+
/* add attributes here, update the policy in nl802154.c */
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
@@ -217,6 +221,45 @@ enum nl802154_wpan_phy_capability_attr {
};
/**
+ * enum nl802154_coord - Netlink attributes for a coord
+ *
+ * @__NL802154_COORD_INVALID: invalid
+ * @NL802154_COORD_PANID: PANID of the coordinator (2 bytes)
+ * @NL802154_COORD_ADDR: coordinator address, (8 bytes or 2 bytes)
+ * @NL802154_COORD_CHANNEL: channel number, related to @NL802154_COORD_PAGE (u8)
+ * @NL802154_COORD_PAGE: channel page, related to @NL802154_COORD_CHANNEL (u8)
+ * @NL802154_COORD_PREAMBLE_CODE: Preamble code used when the beacon was received,
+ * this is PHY dependent and optional (u8)
+ * @NL802154_COORD_MEAN_PRF: Mean PRF used when the beacon was received,
+ * this is PHY dependent and optional (u8)
+ * @NL802154_COORD_SUPERFRAME_SPEC: superframe specification of the PAN (u16)
+ * @NL802154_COORD_LINK_QUALITY: signal quality of beacon in unspecified units,
+ * scaled to 0..255 (u8)
+ * @NL802154_COORD_GTS_PERMIT: set to true if GTS is permitted on this PAN
+ * @NL802154_COORD_PAYLOAD_DATA: binary data containing the raw data from the
+ * frame payload, (only if beacon or probe response had data)
+ * @NL802154_COORD_PAD: attribute used for padding for 64-bit alignment
+ * @NL802154_COORD_MAX: highest coordinator attribute
+ */
+enum nl802154_coord {
+ __NL802154_COORD_INVALID,
+ NL802154_COORD_PANID,
+ NL802154_COORD_ADDR,
+ NL802154_COORD_CHANNEL,
+ NL802154_COORD_PAGE,
+ NL802154_COORD_PREAMBLE_CODE,
+ NL802154_COORD_MEAN_PRF,
+ NL802154_COORD_SUPERFRAME_SPEC,
+ NL802154_COORD_LINK_QUALITY,
+ NL802154_COORD_GTS_PERMIT,
+ NL802154_COORD_PAYLOAD_DATA,
+ NL802154_COORD_PAD,
+
+ /* keep last */
+ NL802154_COORD_MAX,
+};
+
+/**
* enum nl802154_cca_modes - cca modes
*
* @__NL802154_CCA_INVALID: cca mode number 0 is reserved
diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index bf8bb3357825..d9076a7a430c 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -186,8 +186,9 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
const struct rtnl_link_ops *ops,
struct nlattr *tb[],
struct netlink_ext_ack *extack);
-int rtnl_delete_link(struct net_device *dev);
-int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm);
+int rtnl_delete_link(struct net_device *dev, u32 portid, const struct nlmsghdr *nlh);
+int rtnl_configure_link(struct net_device *dev, const struct ifinfomsg *ifm,
+ u32 portid, const struct nlmsghdr *nlh);
int rtnl_nla_parse_ifla(struct nlattr **tb, const struct nlattr *head, int len,
struct netlink_ext_ack *exterr);
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index 5a9bb09f32b6..f514a0aa849e 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -24,7 +24,7 @@
#define __sctp_checksum_h__
#include <linux/types.h>
-#include <net/sctp/sctp.h>
+#include <linux/sctp.h>
#include <linux/crc32c.h>
#include <linux/crc32.h>
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index a04999ee99b0..c335dd01a597 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -67,11 +67,6 @@
#define SCTP_PROTOSW_FLAG INET_PROTOSW_PERMANENT
#endif
-/* Round an int up to the next multiple of 4. */
-#define SCTP_PAD4(s) (((s)+3)&~3)
-/* Truncate to the previous multiple of 4. */
-#define SCTP_TRUNC4(s) ((s)&~3)
-
/*
* Function declarations.
*/
@@ -114,7 +109,7 @@ struct sctp_transport *sctp_transport_get_idx(struct net *net,
struct rhashtable_iter *iter, int pos);
int sctp_transport_lookup_process(sctp_callback_t cb, struct net *net,
const union sctp_addr *laddr,
- const union sctp_addr *paddr, void *p);
+ const union sctp_addr *paddr, void *p, int dif);
int sctp_transport_traverse_process(sctp_callback_t cb, sctp_callback_t cb_done,
struct net *net, int *pos, void *p);
int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p);
@@ -162,10 +157,12 @@ void sctp_unhash_transport(struct sctp_transport *t);
struct sctp_transport *sctp_addrs_lookup_transport(
struct net *net,
const union sctp_addr *laddr,
- const union sctp_addr *paddr);
+ const union sctp_addr *paddr,
+ int dif, int sdif);
struct sctp_transport *sctp_epaddr_lookup_transport(
const struct sctp_endpoint *ep,
const union sctp_addr *paddr);
+bool sctp_sk_bound_dev_eq(struct net *net, int bound_dev_if, int dif, int sdif);
/*
* sctp/proc.c
diff --git a/include/net/sctp/stream_sched.h b/include/net/sctp/stream_sched.h
index 65058faea4db..fa00dc20a0d7 100644
--- a/include/net/sctp/stream_sched.h
+++ b/include/net/sctp/stream_sched.h
@@ -28,8 +28,6 @@ struct sctp_sched_ops {
int (*init_sid)(struct sctp_stream *stream, __u16 sid, gfp_t gfp);
/* free a stream */
void (*free_sid)(struct sctp_stream *stream, __u16 sid);
- /* Frees the entire thing */
- void (*free)(struct sctp_stream *stream);
/* Enqueue a chunk */
void (*enqueue)(struct sctp_outq *q, struct sctp_datamsg *msg);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 350f250b0dc7..afa3781e3ca2 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -477,6 +477,7 @@ struct sctp_af {
int (*available) (union sctp_addr *,
struct sctp_sock *);
int (*skb_iif) (const struct sk_buff *sk);
+ int (*skb_sdif)(const struct sk_buff *sk);
int (*is_ce) (const struct sk_buff *sk);
void (*seq_dump_addr)(struct seq_file *seq,
union sctp_addr *addr);
@@ -1378,10 +1379,12 @@ struct sctp_association *sctp_endpoint_lookup_assoc(
struct sctp_transport **);
bool sctp_endpoint_is_peeled_off(struct sctp_endpoint *ep,
const union sctp_addr *paddr);
-struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *,
- struct net *, const union sctp_addr *);
+struct sctp_endpoint *sctp_endpoint_is_match(struct sctp_endpoint *ep,
+ struct net *net,
+ const union sctp_addr *laddr,
+ int dif, int sdif);
bool sctp_has_association(struct net *net, const union sctp_addr *laddr,
- const union sctp_addr *paddr);
+ const union sctp_addr *paddr, int dif, int sdif);
int sctp_verify_init(struct net *net, const struct sctp_endpoint *ep,
const struct sctp_association *asoc,
diff --git a/include/net/sctp/ulpqueue.h b/include/net/sctp/ulpqueue.h
index 0eaf8650e3b2..60f6641290c3 100644
--- a/include/net/sctp/ulpqueue.h
+++ b/include/net/sctp/ulpqueue.h
@@ -35,8 +35,7 @@ struct sctp_ulpq {
};
/* Prototypes. */
-struct sctp_ulpq *sctp_ulpq_init(struct sctp_ulpq *,
- struct sctp_association *);
+void sctp_ulpq_init(struct sctp_ulpq *ulpq, struct sctp_association *asoc);
void sctp_ulpq_flush(struct sctp_ulpq *ulpq);
void sctp_ulpq_free(struct sctp_ulpq *);
diff --git a/include/net/sock.h b/include/net/sock.h
index e0517ecc6531..ecea3dcc2217 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -503,10 +503,10 @@ struct sock {
#if BITS_PER_LONG==32
seqlock_t sk_stamp_seq;
#endif
- u16 sk_tsflags;
- u8 sk_shutdown;
atomic_t sk_tskey;
atomic_t sk_zckey;
+ u32 sk_tsflags;
+ u8 sk_shutdown;
u8 sk_clockid;
u8 sk_txtime_deadline_mode : 1,
@@ -1899,7 +1899,7 @@ static inline void sock_replace_proto(struct sock *sk, struct proto *proto)
struct sockcm_cookie {
u64 transmit_time;
u32 mark;
- u16 tsflags;
+ u32 tsflags;
};
static inline void sockcm_init(struct sockcm_cookie *sockc,
@@ -1908,7 +1908,7 @@ static inline void sockcm_init(struct sockcm_cookie *sockc,
*sockc = (struct sockcm_cookie) { .tsflags = sk->sk_tsflags };
}
-int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
+int __sock_cmsg_send(struct sock *sk, struct cmsghdr *cmsg,
struct sockcm_cookie *sockc);
int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
struct sockcm_cookie *sockc);
diff --git a/include/net/sock_reuseport.h b/include/net/sock_reuseport.h
index efc9085c6892..6ec140b0a61b 100644
--- a/include/net/sock_reuseport.h
+++ b/include/net/sock_reuseport.h
@@ -16,6 +16,7 @@ struct sock_reuseport {
u16 max_socks; /* length of socks */
u16 num_socks; /* elements in socks */
u16 num_closed_socks; /* closed elements in socks */
+ u16 incoming_cpu;
/* The last synq overflow event timestamp of this
* reuse->socks[] group.
*/
@@ -58,5 +59,6 @@ static inline bool reuseport_has_conns(struct sock *sk)
}
void reuseport_has_conns_set(struct sock *sk);
+void reuseport_update_incoming_cpu(struct sock *sk, int val);
#endif /* _SOCK_REUSEPORT_H */
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 7dcdc97c0bc3..ca0312b78294 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -248,6 +248,7 @@ struct switchdev_notifier_fdb_info {
u16 vid;
u8 added_by_user:1,
is_local:1,
+ locked:1,
offloaded:1;
};
diff --git a/include/net/tc_act/tc_ct.h b/include/net/tc_act/tc_ct.h
index 8250d6f0a462..b24ea2d9400b 100644
--- a/include/net/tc_act/tc_ct.h
+++ b/include/net/tc_act/tc_ct.h
@@ -10,6 +10,7 @@
#include <net/netfilter/nf_conntrack_labels.h>
struct tcf_ct_params {
+ struct nf_conntrack_helper *helper;
struct nf_conn *tmpl;
u16 zone;
diff --git a/include/net/tc_act/tc_skbedit.h b/include/net/tc_act/tc_skbedit.h
index dc1079f28e13..9649600fb3dc 100644
--- a/include/net/tc_act/tc_skbedit.h
+++ b/include/net/tc_act/tc_skbedit.h
@@ -95,12 +95,41 @@ static inline u32 tcf_skbedit_priority(const struct tc_action *a)
return priority;
}
+static inline u16 tcf_skbedit_rx_queue_mapping(const struct tc_action *a)
+{
+ u16 rx_queue;
+
+ rcu_read_lock();
+ rx_queue = rcu_dereference(to_skbedit(a)->params)->queue_mapping;
+ rcu_read_unlock();
+
+ return rx_queue;
+}
+
/* Return true iff action is queue_mapping */
static inline bool is_tcf_skbedit_queue_mapping(const struct tc_action *a)
{
return is_tcf_skbedit_with_flag(a, SKBEDIT_F_QUEUE_MAPPING);
}
+/* Return true if action is on ingress traffic */
+static inline bool is_tcf_skbedit_ingress(u32 flags)
+{
+ return flags & TCA_ACT_FLAGS_AT_INGRESS;
+}
+
+static inline bool is_tcf_skbedit_tx_queue_mapping(const struct tc_action *a)
+{
+ return is_tcf_skbedit_queue_mapping(a) &&
+ !is_tcf_skbedit_ingress(a->tcfa_flags);
+}
+
+static inline bool is_tcf_skbedit_rx_queue_mapping(const struct tc_action *a)
+{
+ return is_tcf_skbedit_queue_mapping(a) &&
+ is_tcf_skbedit_ingress(a->tcfa_flags);
+}
+
/* Return true iff action is inheritdsfield */
static inline bool is_tcf_skbedit_inheritdsfield(const struct tc_action *a)
{
diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
new file mode 100644
index 000000000000..ceed2fc089ff
--- /dev/null
+++ b/include/net/tc_wrapper.h
@@ -0,0 +1,251 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NET_TC_WRAPPER_H
+#define __NET_TC_WRAPPER_H
+
+#include <net/pkt_cls.h>
+
+#if IS_ENABLED(CONFIG_RETPOLINE)
+
+#include <linux/cpufeature.h>
+#include <linux/static_key.h>
+#include <linux/indirect_call_wrapper.h>
+
+#define TC_INDIRECT_SCOPE
+
+extern struct static_key_false tc_skip_wrapper;
+
+/* TC Actions */
+#ifdef CONFIG_NET_CLS_ACT
+
+#define TC_INDIRECT_ACTION_DECLARE(fname) \
+ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \
+ const struct tc_action *a, \
+ struct tcf_result *res))
+
+TC_INDIRECT_ACTION_DECLARE(tcf_bpf_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_connmark_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_csum_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ct_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ctinfo_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_gact_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_gate_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ife_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_ipt_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_mirred_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_mpls_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_nat_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_pedit_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_police_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_sample_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_simp_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_skbedit_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_skbmod_act);
+TC_INDIRECT_ACTION_DECLARE(tcf_vlan_act);
+TC_INDIRECT_ACTION_DECLARE(tunnel_key_act);
+
+static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ if (static_branch_likely(&tc_skip_wrapper))
+ goto skip;
+
+#if IS_BUILTIN(CONFIG_NET_ACT_GACT)
+ if (a->ops->act == tcf_gact_act)
+ return tcf_gact_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_MIRRED)
+ if (a->ops->act == tcf_mirred_act)
+ return tcf_mirred_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_PEDIT)
+ if (a->ops->act == tcf_pedit_act)
+ return tcf_pedit_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SKBEDIT)
+ if (a->ops->act == tcf_skbedit_act)
+ return tcf_skbedit_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SKBMOD)
+ if (a->ops->act == tcf_skbmod_act)
+ return tcf_skbmod_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_POLICE)
+ if (a->ops->act == tcf_police_act)
+ return tcf_police_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_BPF)
+ if (a->ops->act == tcf_bpf_act)
+ return tcf_bpf_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CONNMARK)
+ if (a->ops->act == tcf_connmark_act)
+ return tcf_connmark_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CSUM)
+ if (a->ops->act == tcf_csum_act)
+ return tcf_csum_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CT)
+ if (a->ops->act == tcf_ct_act)
+ return tcf_ct_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_CTINFO)
+ if (a->ops->act == tcf_ctinfo_act)
+ return tcf_ctinfo_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_GATE)
+ if (a->ops->act == tcf_gate_act)
+ return tcf_gate_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_MPLS)
+ if (a->ops->act == tcf_mpls_act)
+ return tcf_mpls_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_NAT)
+ if (a->ops->act == tcf_nat_act)
+ return tcf_nat_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_TUNNEL_KEY)
+ if (a->ops->act == tunnel_key_act)
+ return tunnel_key_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_VLAN)
+ if (a->ops->act == tcf_vlan_act)
+ return tcf_vlan_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_IFE)
+ if (a->ops->act == tcf_ife_act)
+ return tcf_ife_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_IPT)
+ if (a->ops->act == tcf_ipt_act)
+ return tcf_ipt_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SIMP)
+ if (a->ops->act == tcf_simp_act)
+ return tcf_simp_act(skb, a, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_ACT_SAMPLE)
+ if (a->ops->act == tcf_sample_act)
+ return tcf_sample_act(skb, a, res);
+#endif
+
+skip:
+ return a->ops->act(skb, a, res);
+}
+
+#endif /* CONFIG_NET_CLS_ACT */
+
+/* TC Filters */
+#ifdef CONFIG_NET_CLS
+
+#define TC_INDIRECT_FILTER_DECLARE(fname) \
+ INDIRECT_CALLABLE_DECLARE(int fname(struct sk_buff *skb, \
+ const struct tcf_proto *tp, \
+ struct tcf_result *res))
+
+TC_INDIRECT_FILTER_DECLARE(basic_classify);
+TC_INDIRECT_FILTER_DECLARE(cls_bpf_classify);
+TC_INDIRECT_FILTER_DECLARE(cls_cgroup_classify);
+TC_INDIRECT_FILTER_DECLARE(fl_classify);
+TC_INDIRECT_FILTER_DECLARE(flow_classify);
+TC_INDIRECT_FILTER_DECLARE(fw_classify);
+TC_INDIRECT_FILTER_DECLARE(mall_classify);
+TC_INDIRECT_FILTER_DECLARE(route4_classify);
+TC_INDIRECT_FILTER_DECLARE(rsvp_classify);
+TC_INDIRECT_FILTER_DECLARE(rsvp6_classify);
+TC_INDIRECT_FILTER_DECLARE(tcindex_classify);
+TC_INDIRECT_FILTER_DECLARE(u32_classify);
+
+static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ if (static_branch_likely(&tc_skip_wrapper))
+ goto skip;
+
+#if IS_BUILTIN(CONFIG_NET_CLS_BPF)
+ if (tp->classify == cls_bpf_classify)
+ return cls_bpf_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_U32)
+ if (tp->classify == u32_classify)
+ return u32_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FLOWER)
+ if (tp->classify == fl_classify)
+ return fl_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FW)
+ if (tp->classify == fw_classify)
+ return fw_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_MATCHALL)
+ if (tp->classify == mall_classify)
+ return mall_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_BASIC)
+ if (tp->classify == basic_classify)
+ return basic_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_CGROUP)
+ if (tp->classify == cls_cgroup_classify)
+ return cls_cgroup_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_FLOW)
+ if (tp->classify == flow_classify)
+ return flow_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_ROUTE4)
+ if (tp->classify == route4_classify)
+ return route4_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_RSVP)
+ if (tp->classify == rsvp_classify)
+ return rsvp_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_RSVP6)
+ if (tp->classify == rsvp6_classify)
+ return rsvp6_classify(skb, tp, res);
+#endif
+#if IS_BUILTIN(CONFIG_NET_CLS_TCINDEX)
+ if (tp->classify == tcindex_classify)
+ return tcindex_classify(skb, tp, res);
+#endif
+
+skip:
+ return tp->classify(skb, tp, res);
+}
+
+static inline void tc_wrapper_init(void)
+{
+#ifdef CONFIG_X86
+ if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+ static_branch_enable(&tc_skip_wrapper);
+#endif
+}
+
+#endif /* CONFIG_NET_CLS */
+
+#else
+
+#define TC_INDIRECT_SCOPE static
+
+static inline int tc_act(struct sk_buff *skb, const struct tc_action *a,
+ struct tcf_result *res)
+{
+ return a->ops->act(skb, a, res);
+}
+
+static inline int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+ struct tcf_result *res)
+{
+ return tp->classify(skb, tp, res);
+}
+
+static inline void tc_wrapper_init(void)
+{
+}
+
+#endif
+
+#endif /* __NET_TC_WRAPPER_H */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 14d45661a84d..db9f828e9d1e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1675,7 +1675,11 @@ int tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key,
const struct sock *sk, const struct sk_buff *skb);
int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen, int l3index, u8 flags,
- const u8 *newkey, u8 newkeylen, gfp_t gfp);
+ const u8 *newkey, u8 newkeylen);
+int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr,
+ int family, u8 prefixlen, int l3index,
+ struct tcp_md5sig_key *key);
+
int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr,
int family, u8 prefixlen, int l3index, u8 flags);
struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
@@ -1683,7 +1687,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk,
#ifdef CONFIG_TCP_MD5SIG
#include <linux/jump_label.h>
-extern struct static_key_false tcp_md5_needed;
+extern struct static_key_false_deferred tcp_md5_needed;
struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr,
int family);
@@ -1691,7 +1695,7 @@ static inline struct tcp_md5sig_key *
tcp_md5_do_lookup(const struct sock *sk, int l3index,
const union tcp_md5_addr *addr, int family)
{
- if (!static_branch_unlikely(&tcp_md5_needed))
+ if (!static_branch_unlikely(&tcp_md5_needed.key))
return NULL;
return __tcp_md5_do_lookup(sk, l3index, addr, family);
}
@@ -2140,6 +2144,34 @@ extern void tcp_rack_advance(struct tcp_sock *tp, u8 sacked, u32 end_seq,
extern void tcp_rack_reo_timeout(struct sock *sk);
extern void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs);
+/* tcp_plb.c */
+
+/*
+ * Scaling factor for fractions in PLB. For example, tcp_plb_update_state
+ * expects cong_ratio which represents fraction of traffic that experienced
+ * congestion over a single RTT. In order to avoid floating point operations,
+ * this fraction should be mapped to (1 << TCP_PLB_SCALE) and passed in.
+ */
+#define TCP_PLB_SCALE 8
+
+/* State for PLB (Protective Load Balancing) for a single TCP connection. */
+struct tcp_plb_state {
+ u8 consec_cong_rounds:5, /* consecutive congested rounds */
+ unused:3;
+ u32 pause_until; /* jiffies32 when PLB can resume rerouting */
+};
+
+static inline void tcp_plb_init(const struct sock *sk,
+ struct tcp_plb_state *plb)
+{
+ plb->consec_cong_rounds = 0;
+ plb->pause_until = 0;
+}
+void tcp_plb_update_state(const struct sock *sk, struct tcp_plb_state *plb,
+ const int cong_ratio);
+void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb);
+void tcp_plb_update_state_upon_rto(struct sock *sk, struct tcp_plb_state *plb);
+
/* At how many usecs into the future should the RTO fire? */
static inline s64 tcp_rto_delta_us(const struct sock *sk)
{
@@ -2291,8 +2323,8 @@ int tcp_bpf_update_proto(struct sock *sk, struct sk_psock *psock, bool restore);
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk);
#endif /* CONFIG_BPF_SYSCALL */
-int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
- int flags);
+int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
+ struct sk_msg *msg, u32 bytes, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
#if !defined(CONFIG_BPF_SYSCALL) || !defined(CONFIG_NET_SOCK_MSG)
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index b830463e3dff..d27b1caf3753 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -58,8 +58,6 @@ ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp, __u16 srcp,
#define LOOPBACK4_IPV6 cpu_to_be32(0x7f000006)
-void inet6_destroy_sock(struct sock *sk);
-
#define IPV6_SEQ_DGRAM_HEADER \
" sl " \
"local_address " \
diff --git a/include/net/tso.h b/include/net/tso.h
index 62c98a9c60f1..e7e157ae0526 100644
--- a/include/net/tso.h
+++ b/include/net/tso.h
@@ -2,6 +2,7 @@
#ifndef _TSO_H
#define _TSO_H
+#include <linux/skbuff.h>
#include <net/ip.h>
#define TSO_HEADER_SIZE 256
@@ -16,7 +17,12 @@ struct tso_t {
u32 tcp_seq;
};
-int tso_count_descs(const struct sk_buff *skb);
+/* Calculate the worst case buffer count */
+static inline int tso_count_descs(const struct sk_buff *skb)
+{
+ return skb_shinfo(skb)->gso_segs * 2 + skb_shinfo(skb)->nr_frags;
+}
+
void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso,
int size, bool is_last);
void tso_build_data(const struct sk_buff *skb, struct tso_t *tso, int size);
diff --git a/include/net/udp.h b/include/net/udp.h
index fee053bcd17c..de4b528522bb 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -174,6 +174,15 @@ INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
netdev_features_t features, bool is_ipv6);
+static inline void udp_lib_init_sock(struct sock *sk)
+{
+ struct udp_sock *up = udp_sk(sk);
+
+ skb_queue_head_init(&up->reader_queue);
+ up->forward_threshold = sk->sk_rcvbuf >> 2;
+ set_bit(SOCK_CUSTOM_SOCKOPT, &sk->sk_socket->flags);
+}
+
/* hash routines shared between UDPv4/6 and UDP-Litev4/6 */
static inline int udp_lib_hash(struct sock *sk)
{
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 72394f441dad..0ca9b7a11baf 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -68,8 +68,8 @@ typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb);
typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk,
struct sk_buff *skb);
typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk,
- struct sk_buff *skb,
- unsigned int udp_offset);
+ struct sk_buff *skb, int err,
+ __be16 port, u32 info, u8 *payload);
typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk);
typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk,
struct list_head *head,
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index dbc81f5eb553..3e1f70e8e424 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -129,6 +129,13 @@ struct xfrm_state_walk {
enum {
XFRM_DEV_OFFLOAD_IN = 1,
XFRM_DEV_OFFLOAD_OUT,
+ XFRM_DEV_OFFLOAD_FWD,
+};
+
+enum {
+ XFRM_DEV_OFFLOAD_UNSPECIFIED,
+ XFRM_DEV_OFFLOAD_CRYPTO,
+ XFRM_DEV_OFFLOAD_PACKET,
};
struct xfrm_dev_offload {
@@ -137,6 +144,7 @@ struct xfrm_dev_offload {
struct net_device *real_dev;
unsigned long offload_handle;
u8 dir : 2;
+ u8 type : 2;
};
struct xfrm_mode {
@@ -534,6 +542,8 @@ struct xfrm_policy {
struct xfrm_tmpl xfrm_vec[XFRM_MAX_DEPTH];
struct hlist_node bydst_inexact_list;
struct rcu_head rcu;
+
+ struct xfrm_dev_offload xdo;
};
static inline struct net *xp_net(const struct xfrm_policy *xp)
@@ -1093,6 +1103,29 @@ xfrm_state_addr_cmp(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x, un
}
#ifdef CONFIG_XFRM
+static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
+{
+ struct sec_path *sp = skb_sec_path(skb);
+
+ return sp->xvec[sp->len - 1];
+}
+#endif
+
+static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
+{
+#ifdef CONFIG_XFRM
+ struct sec_path *sp = skb_sec_path(skb);
+
+ if (!sp || !sp->olen || sp->len != sp->olen)
+ return NULL;
+
+ return &sp->ovec[sp->olen - 1];
+#else
+ return NULL;
+#endif
+}
+
+#ifdef CONFIG_XFRM
int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb,
unsigned short family);
@@ -1123,10 +1156,19 @@ static inline int __xfrm_policy_check2(struct sock *sk, int dir,
{
struct net *net = dev_net(skb->dev);
int ndir = dir | (reverse ? XFRM_POLICY_MASK + 1 : 0);
+ struct xfrm_offload *xo = xfrm_offload(skb);
+ struct xfrm_state *x;
if (sk && sk->sk_policy[XFRM_POLICY_IN])
return __xfrm_policy_check(sk, ndir, skb, family);
+ if (xo) {
+ x = xfrm_input_state(skb);
+ if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET)
+ return (xo->flags & CRYPTO_DONE) &&
+ (xo->status & CRYPTO_SUCCESS);
+ }
+
return __xfrm_check_nopolicy(net, skb, dir) ||
__xfrm_check_dev_nopolicy(skb, dir, family) ||
__xfrm_policy_check(sk, ndir, skb, family);
@@ -1529,6 +1571,23 @@ struct xfrm_state *xfrm_stateonly_find(struct net *net, u32 mark, u32 if_id,
struct xfrm_state *xfrm_state_lookup_byspi(struct net *net, __be32 spi,
unsigned short family);
int xfrm_state_check_expire(struct xfrm_state *x);
+#ifdef CONFIG_XFRM_OFFLOAD
+static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x)
+{
+ struct xfrm_dev_offload *xdo = &x->xso;
+ struct net_device *dev = xdo->dev;
+
+ if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
+ return;
+
+ if (dev && dev->xfrmdev_ops &&
+ dev->xfrmdev_ops->xdo_dev_state_update_curlft)
+ dev->xfrmdev_ops->xdo_dev_state_update_curlft(x);
+
+}
+#else
+static inline void xfrm_dev_state_update_curlft(struct xfrm_state *x) {}
+#endif
void xfrm_state_insert(struct xfrm_state *x);
int xfrm_state_add(struct xfrm_state *x);
int xfrm_state_update(struct xfrm_state *x);
@@ -1578,6 +1637,8 @@ struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
int xfrm_state_delete(struct xfrm_state *x);
int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
+int xfrm_dev_policy_flush(struct net *net, struct net_device *dev,
+ bool task_valid);
void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
u32 xfrm_replay_seqhi(struct xfrm_state *x, __be32 net_seq);
@@ -1681,8 +1742,9 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net,
int xfrm_policy_flush(struct net *net, u8 type, bool task_valid);
void xfrm_policy_hash_rebuild(struct net *net);
u32 xfrm_get_acqseq(void);
-int verify_spi_info(u8 proto, u32 min, u32 max);
-int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
+int verify_spi_info(u8 proto, u32 min, u32 max, struct netlink_ext_ack *extack);
+int xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi,
+ struct netlink_ext_ack *extack);
struct xfrm_state *xfrm_find_acq(struct net *net, const struct xfrm_mark *mark,
u8 mode, u32 reqid, u32 if_id, u8 proto,
const xfrm_address_t *daddr,
@@ -1703,7 +1765,8 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_bundles,
struct xfrm_kmaddress *k, struct net *net,
- struct xfrm_encap_tmpl *encap, u32 if_id);
+ struct xfrm_encap_tmpl *encap, u32 if_id,
+ struct netlink_ext_ack *extack);
#endif
int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);
@@ -1858,29 +1921,6 @@ static inline void xfrm_states_delete(struct xfrm_state **states, int n)
}
#endif
-#ifdef CONFIG_XFRM
-static inline struct xfrm_state *xfrm_input_state(struct sk_buff *skb)
-{
- struct sec_path *sp = skb_sec_path(skb);
-
- return sp->xvec[sp->len - 1];
-}
-#endif
-
-static inline struct xfrm_offload *xfrm_offload(struct sk_buff *skb)
-{
-#ifdef CONFIG_XFRM
- struct sec_path *sp = skb_sec_path(skb);
-
- if (!sp || !sp->olen || sp->len != sp->olen)
- return NULL;
-
- return &sp->ovec[sp->olen - 1];
-#else
- return NULL;
-#endif
-}
-
void __init xfrm_dev_init(void);
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1890,6 +1930,9 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur
int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
struct xfrm_user_offload *xuo,
struct netlink_ext_ack *extack);
+int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
+ struct xfrm_user_offload *xuo, u8 dir,
+ struct netlink_ext_ack *extack);
bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x);
static inline void xfrm_dev_state_advance_esn(struct xfrm_state *x)
@@ -1938,6 +1981,28 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
netdev_put(dev, &xso->dev_tracker);
}
}
+
+static inline void xfrm_dev_policy_delete(struct xfrm_policy *x)
+{
+ struct xfrm_dev_offload *xdo = &x->xdo;
+ struct net_device *dev = xdo->dev;
+
+ if (dev && dev->xfrmdev_ops && dev->xfrmdev_ops->xdo_dev_policy_delete)
+ dev->xfrmdev_ops->xdo_dev_policy_delete(x);
+}
+
+static inline void xfrm_dev_policy_free(struct xfrm_policy *x)
+{
+ struct xfrm_dev_offload *xdo = &x->xdo;
+ struct net_device *dev = xdo->dev;
+
+ if (dev && dev->xfrmdev_ops) {
+ if (dev->xfrmdev_ops->xdo_dev_policy_free)
+ dev->xfrmdev_ops->xdo_dev_policy_free(x);
+ xdo->dev = NULL;
+ netdev_put(dev, &xdo->dev_tracker);
+ }
+}
#else
static inline void xfrm_dev_resume(struct sk_buff *skb)
{
@@ -1965,6 +2030,21 @@ static inline void xfrm_dev_state_free(struct xfrm_state *x)
{
}
+static inline int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
+ struct xfrm_user_offload *xuo, u8 dir,
+ struct netlink_ext_ack *extack)
+{
+ return 0;
+}
+
+static inline void xfrm_dev_policy_delete(struct xfrm_policy *x)
+{
+}
+
+static inline void xfrm_dev_policy_free(struct xfrm_policy *x)
+{
+}
+
static inline bool xfrm_dev_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
{
return false;
@@ -2084,4 +2164,21 @@ static inline bool xfrm6_local_dontfrag(const struct sock *sk)
return false;
}
#endif
+
+#if (IS_BUILTIN(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
+ (IS_MODULE(CONFIG_XFRM_INTERFACE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
+
+extern struct metadata_dst __percpu *xfrm_bpf_md_dst;
+
+int register_xfrm_interface_bpf(void);
+
+#else
+
+static inline int register_xfrm_interface_bpf(void)
+{
+ return 0;
+}
+
+#endif
+
#endif /* _NET_XFRM_H */