summaryrefslogtreecommitdiff
path: root/include/linux/skbuff.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 13:04:52 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-12-27 13:04:52 -0800
commite0c38a4d1f196a4b17d2eba36afff8f656a4f1de (patch)
treeb26a69fabef0160adb127416a9744217700feeb7 /include/linux/skbuff.h
parent7f9f852c75e7d776b078813586c76a2bc7dca993 (diff)
parent90cadbbf341dd5b2df991c33a6bd6341f3a53788 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: 1) New ipset extensions for matching on destination MAC addresses, from Stefano Brivio. 2) Add ipv4 ttl and tos, plus ipv6 flow label and hop limit offloads to nfp driver. From Stefano Brivio. 3) Implement GRO for plain UDP sockets, from Paolo Abeni. 4) Lots of work from Michał Mirosław to eliminate the VLAN_TAG_PRESENT bit so that we could support the entire vlan_tci value. 5) Rework the IPSEC policy lookups to better optimize more usecases, from Florian Westphal. 6) Infrastructure changes eliminating direct manipulation of SKB lists wherever possible, and to always use the appropriate SKB list helpers. This work is still ongoing... 7) Lots of PHY driver and state machine improvements and simplifications, from Heiner Kallweit. 8) Various TSO deferral refinements, from Eric Dumazet. 9) Add ntuple filter support to aquantia driver, from Dmitry Bogdanov. 10) Batch dropping of XDP packets in tuntap, from Jason Wang. 11) Lots of cleanups and improvements to the r8169 driver from Heiner Kallweit, including support for ->xmit_more. This driver has been getting some much needed love since he started working on it. 12) Lots of new forwarding selftests from Petr Machata. 13) Enable VXLAN learning in mlxsw driver, from Ido Schimmel. 14) Packed ring support for virtio, from Tiwei Bie. 15) Add new Aquantia AQtion USB driver, from Dmitry Bezrukov. 16) Add XDP support to dpaa2-eth driver, from Ioana Ciocoi Radulescu. 17) Implement coalescing on TCP backlog queue, from Eric Dumazet. 18) Implement carrier change in tun driver, from Nicolas Dichtel. 19) Support msg_zerocopy in UDP, from Willem de Bruijn. 20) Significantly improve garbage collection of neighbor objects when the table has many PERMANENT entries, from David Ahern. 21) Remove egdev usage from nfp and mlx5, and remove the facility completely from the tree as it no longer has any users. From Oz Shlomo and others. 22) Add a NETDEV_PRE_CHANGEADDR so that drivers can veto the change and therefore abort the operation before the commit phase (which is the NETDEV_CHANGEADDR event). From Petr Machata. 23) Add indirect call wrappers to avoid retpoline overhead, and use them in the GRO code paths. From Paolo Abeni. 24) Add support for netlink FDB get operations, from Roopa Prabhu. 25) Support bloom filter in mlxsw driver, from Nir Dotan. 26) Add SKB extension infrastructure. This consolidates the handling of the auxiliary SKB data used by IPSEC and bridge netfilter, and is designed to support the needs to MPTCP which could be integrated in the future. 27) Lots of XDP TX optimizations in mlx5 from Tariq Toukan. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1845 commits) net: dccp: fix kernel crash on module load drivers/net: appletalk/cops: remove redundant if statement and mask bnx2x: Fix NULL pointer dereference in bnx2x_del_all_vlans() on some hw net/net_namespace: Check the return value of register_pernet_subsys() net/netlink_compat: Fix a missing check of nla_parse_nested ieee802154: lowpan_header_create check must check daddr net/mlx4_core: drop useless LIST_HEAD mlxsw: spectrum: drop useless LIST_HEAD net/mlx5e: drop useless LIST_HEAD iptunnel: Set tun_flags in the iptunnel_metadata_reply from src net/mlx5e: fix semicolon.cocci warnings staging: octeon: fix build failure with XFRM enabled net: Revert recent Spectre-v1 patches. can: af_can: Fix Spectre v1 vulnerability packet: validate address length if non-zero nfc: af_nfc: Fix Spectre v1 vulnerability phonet: af_phonet: Fix Spectre v1 vulnerability net: core: Fix Spectre v1 vulnerability net: minor cleanup in skb_ext_add() net: drop the unused helper skb_ext_get() ...
Diffstat (limited to 'include/linux/skbuff.h')
-rw-r--r--include/linux/skbuff.h175
1 files changed, 133 insertions, 42 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0d1b2c3f127b..2a57a365c711 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -245,6 +245,7 @@ struct iov_iter;
struct napi_struct;
struct bpf_prog;
union bpf_attr;
+struct skb_ext;
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
struct nf_conntrack {
@@ -254,7 +255,6 @@ struct nf_conntrack {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
struct nf_bridge_info {
- refcount_t use;
enum {
BRNF_PROTO_UNCHANGED,
BRNF_PROTO_8021Q,
@@ -481,10 +481,11 @@ static inline void sock_zerocopy_get(struct ubuf_info *uarg)
}
void sock_zerocopy_put(struct ubuf_info *uarg);
-void sock_zerocopy_put_abort(struct ubuf_info *uarg);
+void sock_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref);
void sock_zerocopy_callback(struct ubuf_info *uarg, bool success);
+int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len);
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg);
@@ -615,6 +616,8 @@ typedef unsigned char *sk_buff_data_t;
* @pkt_type: Packet class
* @fclone: skbuff clone status
* @ipvs_property: skbuff is owned by ipvs
+ * @offload_fwd_mark: Packet was L2-forwarded in hardware
+ * @offload_l3_fwd_mark: Packet was L3-forwarded in hardware
* @tc_skip_classify: do not classify packet. set by IFB device
* @tc_at_ingress: used within tc_classify to distinguish in/egress
* @tc_redirected: packet was redirected by a tc action
@@ -633,6 +636,7 @@ typedef unsigned char *sk_buff_data_t;
* @queue_mapping: Queue mapping for multiqueue devices
* @xmit_more: More SKBs are pending for this queue
* @pfmemalloc: skbuff was allocated from PFMEMALLOC reserves
+ * @active_extensions: active extensions (skb_ext_id types)
* @ndisc_nodetype: router type (from link layer)
* @ooo_okay: allow the mapping of a socket to a queue to be changed
* @l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -662,6 +666,7 @@ typedef unsigned char *sk_buff_data_t;
* @data: Data head pointer
* @truesize: Buffer size
* @users: User count - see {datagram,tcp}.c
+ * @extensions: allocated extensions, valid if active_extensions is nonzero
*/
struct sk_buff {
@@ -709,15 +714,9 @@ struct sk_buff {
struct list_head tcp_tsorted_anchor;
};
-#ifdef CONFIG_XFRM
- struct sec_path *sp;
-#endif
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
unsigned long _nfct;
#endif
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- struct nf_bridge_info *nf_bridge;
-#endif
unsigned int len,
data_len;
__u16 mac_len,
@@ -744,7 +743,9 @@ struct sk_buff {
head_frag:1,
xmit_more:1,
pfmemalloc:1;
-
+#ifdef CONFIG_SKB_EXTENSIONS
+ __u8 active_extensions;
+#endif
/* fields enclosed in headers_start/headers_end are copied
* using a single memcpy() in __copy_skb_header()
*/
@@ -777,6 +778,14 @@ struct sk_buff {
__u8 encap_hdr_csum:1;
__u8 csum_valid:1;
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_VLAN_PRESENT_BIT 7
+#else
+#define PKT_VLAN_PRESENT_BIT 0
+#endif
+#define PKT_VLAN_PRESENT_OFFSET() offsetof(struct sk_buff, __pkt_vlan_present_offset)
+ __u8 __pkt_vlan_present_offset[0];
+ __u8 vlan_present:1;
__u8 csum_complete_sw:1;
__u8 csum_level:2;
__u8 csum_not_inet:1;
@@ -784,13 +793,13 @@ struct sk_buff {
#ifdef CONFIG_IPV6_NDISC_NODETYPE
__u8 ndisc_nodetype:2;
#endif
- __u8 ipvs_property:1;
+ __u8 ipvs_property:1;
__u8 inner_protocol_type:1;
__u8 remcsum_offload:1;
#ifdef CONFIG_NET_SWITCHDEV
__u8 offload_fwd_mark:1;
- __u8 offload_mr_fwd_mark:1;
+ __u8 offload_l3_fwd_mark:1;
#endif
#ifdef CONFIG_NET_CLS_ACT
__u8 tc_skip_classify:1;
@@ -858,6 +867,11 @@ struct sk_buff {
*data;
unsigned int truesize;
refcount_t users;
+
+#ifdef CONFIG_SKB_EXTENSIONS
+ /* only useable after checking ->active_extensions != 0 */
+ struct skb_ext *extensions;
+#endif
};
#ifdef __KERNEL__
@@ -1317,10 +1331,14 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb)
return is_zcopy ? skb_uarg(skb) : NULL;
}
-static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg)
+static inline void skb_zcopy_set(struct sk_buff *skb, struct ubuf_info *uarg,
+ bool *have_ref)
{
if (skb && uarg && !skb_zcopy(skb)) {
- sock_zerocopy_get(uarg);
+ if (unlikely(have_ref && *have_ref))
+ *have_ref = false;
+ else
+ sock_zerocopy_get(uarg);
skb_shinfo(skb)->destructor_arg = uarg;
skb_shinfo(skb)->tx_flags |= SKBTX_ZEROCOPY_FRAG;
}
@@ -1365,7 +1383,7 @@ static inline void skb_zcopy_abort(struct sk_buff *skb)
struct ubuf_info *uarg = skb_zcopy(skb);
if (uarg) {
- sock_zerocopy_put_abort(uarg);
+ sock_zerocopy_put_abort(uarg, false);
skb_shinfo(skb)->tx_flags &= ~SKBTX_ZEROCOPY_FRAG;
}
}
@@ -1741,8 +1759,6 @@ static inline void skb_queue_head_init_class(struct sk_buff_head *list,
* The "__skb_xxxx()" functions are the non-atomic ones that
* can only be called with interrupts disabled.
*/
-void skb_insert(struct sk_buff *old, struct sk_buff *newsk,
- struct sk_buff_head *list);
static inline void __skb_insert(struct sk_buff *newsk,
struct sk_buff *prev, struct sk_buff *next,
struct sk_buff_head *list)
@@ -2524,10 +2540,8 @@ int ___pskb_trim(struct sk_buff *skb, unsigned int len);
static inline void __skb_set_length(struct sk_buff *skb, unsigned int len)
{
- if (unlikely(skb_is_nonlinear(skb))) {
- WARN_ON(1);
+ if (WARN_ON(skb_is_nonlinear(skb)))
return;
- }
skb->len = len;
skb_set_tail_pointer(skb, len);
}
@@ -3345,7 +3359,6 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset,
unsigned int flags);
int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset,
int len);
-int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
@@ -3886,18 +3899,97 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
atomic_inc(&nfct->use);
}
#endif
+
+#ifdef CONFIG_SKB_EXTENSIONS
+enum skb_ext_id {
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
+ SKB_EXT_BRIDGE_NF,
+#endif
+#ifdef CONFIG_XFRM
+ SKB_EXT_SEC_PATH,
+#endif
+ SKB_EXT_NUM, /* must be last */
+};
+
+/**
+ * struct skb_ext - sk_buff extensions
+ * @refcnt: 1 on allocation, deallocated on 0
+ * @offset: offset to add to @data to obtain extension address
+ * @chunks: size currently allocated, stored in SKB_EXT_ALIGN_SHIFT units
+ * @data: start of extension data, variable sized
+ *
+ * Note: offsets/lengths are stored in chunks of 8 bytes, this allows
+ * to use 'u8' types while allowing up to 2kb worth of extension data.
+ */
+struct skb_ext {
+ refcount_t refcnt;
+ u8 offset[SKB_EXT_NUM]; /* in chunks of 8 bytes */
+ u8 chunks; /* same */
+ char data[0] __aligned(8);
+};
+
+void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
+void __skb_ext_del(struct sk_buff *skb, enum skb_ext_id id);
+void __skb_ext_put(struct skb_ext *ext);
+
+static inline void skb_ext_put(struct sk_buff *skb)
{
- if (nf_bridge && refcount_dec_and_test(&nf_bridge->use))
- kfree(nf_bridge);
+ if (skb->active_extensions)
+ __skb_ext_put(skb->extensions);
}
-static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge)
+
+static inline void __skb_ext_copy(struct sk_buff *dst,
+ const struct sk_buff *src)
{
- if (nf_bridge)
- refcount_inc(&nf_bridge->use);
+ dst->active_extensions = src->active_extensions;
+
+ if (src->active_extensions) {
+ struct skb_ext *ext = src->extensions;
+
+ refcount_inc(&ext->refcnt);
+ dst->extensions = ext;
+ }
}
-#endif /* CONFIG_BRIDGE_NETFILTER */
+
+static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *src)
+{
+ skb_ext_put(dst);
+ __skb_ext_copy(dst, src);
+}
+
+static inline bool __skb_ext_exist(const struct skb_ext *ext, enum skb_ext_id i)
+{
+ return !!ext->offset[i];
+}
+
+static inline bool skb_ext_exist(const struct sk_buff *skb, enum skb_ext_id id)
+{
+ return skb->active_extensions & (1 << id);
+}
+
+static inline void skb_ext_del(struct sk_buff *skb, enum skb_ext_id id)
+{
+ if (skb_ext_exist(skb, id))
+ __skb_ext_del(skb, id);
+}
+
+static inline void *skb_ext_find(const struct sk_buff *skb, enum skb_ext_id id)
+{
+ if (skb_ext_exist(skb, id)) {
+ struct skb_ext *ext = skb->extensions;
+
+ return (void *)ext + (ext->offset[id] << 3);
+ }
+
+ return NULL;
+}
+#else
+static inline void skb_ext_put(struct sk_buff *skb) {}
+static inline void skb_ext_del(struct sk_buff *skb, int unused) {}
+static inline void __skb_ext_copy(struct sk_buff *d, const struct sk_buff *s) {}
+static inline void skb_ext_copy(struct sk_buff *dst, const struct sk_buff *s) {}
+#endif /* CONFIG_SKB_EXTENSIONS */
+
static inline void nf_reset(struct sk_buff *skb)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
@@ -3905,8 +3997,7 @@ static inline void nf_reset(struct sk_buff *skb)
skb->_nfct = 0;
#endif
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- nf_bridge_put(skb->nf_bridge);
- skb->nf_bridge = NULL;
+ skb_ext_del(skb, SKB_EXT_BRIDGE_NF);
#endif
}
@@ -3924,7 +4015,7 @@ static inline void ipvs_reset(struct sk_buff *skb)
#endif
}
-/* Note: This doesn't put any conntrack and bridge info in dst. */
+/* Note: This doesn't put any conntrack info in dst. */
static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
bool copy)
{
@@ -3932,10 +4023,6 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
dst->_nfct = src->_nfct;
nf_conntrack_get(skb_nfct(src));
#endif
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- dst->nf_bridge = src->nf_bridge;
- nf_bridge_get(src->nf_bridge);
-#endif
#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
if (copy)
dst->nf_trace = src->nf_trace;
@@ -3947,9 +4034,6 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
nf_conntrack_put(skb_nfct(dst));
#endif
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
- nf_bridge_put(dst->nf_bridge);
-#endif
__nf_copy(dst, src, true);
}
@@ -3971,12 +4055,19 @@ static inline void skb_init_secmark(struct sk_buff *skb)
{ }
#endif
+static inline int secpath_exists(const struct sk_buff *skb)
+{
+#ifdef CONFIG_XFRM
+ return skb_ext_exist(skb, SKB_EXT_SEC_PATH);
+#else
+ return 0;
+#endif
+}
+
static inline bool skb_irq_freeable(const struct sk_buff *skb)
{
return !skb->destructor &&
-#if IS_ENABLED(CONFIG_XFRM)
- !skb->sp &&
-#endif
+ !secpath_exists(skb) &&
!skb_nfct(skb) &&
!skb->_skb_refdst &&
!skb_has_frag_list(skb);
@@ -4022,10 +4113,10 @@ static inline bool skb_get_dst_pending_confirm(const struct sk_buff *skb)
return skb->dst_pending_confirm != 0;
}
-static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
+static inline struct sec_path *skb_sec_path(const struct sk_buff *skb)
{
#ifdef CONFIG_XFRM
- return skb->sp;
+ return skb_ext_find(skb, SKB_EXT_SEC_PATH);
#else
return NULL;
#endif