From fc651001d2c5ca4f8b87efae2edb69fca94a6365 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 22 May 2019 12:22:21 -0700 Subject: neighbor: Add tracepoint to __neigh_create Add tracepoint to __neigh_create to enable debugging of new entries. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/trace/events/neigh.h | 49 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/neigh.h b/include/trace/events/neigh.h index 0bdb08557763..62bb17516713 100644 --- a/include/trace/events/neigh.h +++ b/include/trace/events/neigh.h @@ -20,6 +20,55 @@ { NUD_NOARP, "noarp" }, \ { NUD_PERMANENT, "permanent"}) +TRACE_EVENT(neigh_create, + + TP_PROTO(struct neigh_table *tbl, struct net_device *dev, + const void *pkey, const struct neighbour *n, + bool exempt_from_gc), + + TP_ARGS(tbl, dev, pkey, n, exempt_from_gc), + + TP_STRUCT__entry( + __field(u32, family) + __dynamic_array(char, dev, IFNAMSIZ ) + __field(int, entries) + __field(u8, created) + __field(u8, gc_exempt) + __array(u8, primary_key4, 4) + __array(u8, primary_key6, 16) + ), + + TP_fast_assign( + struct in6_addr *pin6; + __be32 *p32; + + __entry->family = tbl->family; + __assign_str(dev, (dev ? dev->name : "NULL")); + __entry->entries = atomic_read(&tbl->gc_entries); + __entry->created = n != NULL; + __entry->gc_exempt = exempt_from_gc; + pin6 = (struct in6_addr *)__entry->primary_key6; + p32 = (__be32 *)__entry->primary_key4; + + if (tbl->family == AF_INET) + *p32 = *(__be32 *)pkey; + else + *p32 = 0; + +#if IS_ENABLED(CONFIG_IPV6) + if (tbl->family == AF_INET6) { + pin6 = (struct in6_addr *)__entry->primary_key6; + *pin6 = *(struct in6_addr *)pkey; + } +#endif + ), + + TP_printk("family %d dev %s entries %d primary_key4 %pI4 primary_key6 %pI6c created %d gc_exempt %d", + __entry->family, __get_str(dev), __entry->entries, + __entry->primary_key4, __entry->primary_key6, + __entry->created, __entry->gc_exempt) +); + TRACE_EVENT(neigh_update, TP_PROTO(struct neighbour *n, const u8 *lladdr, u8 new, -- cgit v1.2.3-70-g09d2 From f033b688c1ede5ec78c9a718fa9f0b374049bc31 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:05:58 +0200 Subject: xdp: add tracepoints for XDP mem These tracepoints make it easier to troubleshoot XDP mem id disconnect. The xdp:mem_disconnect tracepoint cannot be replaced via kprobe. It is placed at the last stable place for the pointer to struct xdp_mem_allocator, just before it's scheduled for RCU removal. It also extract info on 'safe_to_remove' and 'force'. Detailed info about in-flight pages is not available at this layer. The next patch will added tracepoints needed at the page_pool layer for this. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/net/xdp_priv.h | 23 +++++++++ include/trace/events/xdp.h | 115 +++++++++++++++++++++++++++++++++++++++++++++ net/core/xdp.c | 21 ++------- 3 files changed, 143 insertions(+), 16 deletions(-) create mode 100644 include/net/xdp_priv.h (limited to 'include/trace') diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h new file mode 100644 index 000000000000..6a8cba6ea79a --- /dev/null +++ b/include/net/xdp_priv.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_NET_XDP_PRIV_H__ +#define __LINUX_NET_XDP_PRIV_H__ + +#include + +/* Private to net/core/xdp.c, but used by trace/events/xdp.h */ +struct xdp_mem_allocator { + struct xdp_mem_info mem; + union { + void *allocator; + struct page_pool *page_pool; + struct zero_copy_allocator *zc_alloc; + }; + int disconnect_cnt; + unsigned long defer_start; + struct rhash_head node; + struct rcu_head rcu; + struct delayed_work defer_wq; + unsigned long defer_warn; +}; + +#endif /* __LINUX_NET_XDP_PRIV_H__ */ diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index e95cb86b65cf..bb5e380e2ef3 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -269,6 +269,121 @@ TRACE_EVENT(xdp_devmap_xmit, __entry->from_ifindex, __entry->to_ifindex, __entry->err) ); +/* Expect users already include , but not xdp_priv.h */ +#include + +#define __MEM_TYPE_MAP(FN) \ + FN(PAGE_SHARED) \ + FN(PAGE_ORDER0) \ + FN(PAGE_POOL) \ + FN(ZERO_COPY) + +#define __MEM_TYPE_TP_FN(x) \ + TRACE_DEFINE_ENUM(MEM_TYPE_##x); +#define __MEM_TYPE_SYM_FN(x) \ + { MEM_TYPE_##x, #x }, +#define __MEM_TYPE_SYM_TAB \ + __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 } +__MEM_TYPE_MAP(__MEM_TYPE_TP_FN) + +TRACE_EVENT(mem_disconnect, + + TP_PROTO(const struct xdp_mem_allocator *xa, + bool safe_to_remove, bool force), + + TP_ARGS(xa, safe_to_remove, force), + + TP_STRUCT__entry( + __field(const struct xdp_mem_allocator *, xa) + __field(u32, mem_id) + __field(u32, mem_type) + __field(const void *, allocator) + __field(bool, safe_to_remove) + __field(bool, force) + __field(int, disconnect_cnt) + ), + + TP_fast_assign( + __entry->xa = xa; + __entry->mem_id = xa->mem.id; + __entry->mem_type = xa->mem.type; + __entry->allocator = xa->allocator; + __entry->safe_to_remove = safe_to_remove; + __entry->force = force; + __entry->disconnect_cnt = xa->disconnect_cnt; + ), + + TP_printk("mem_id=%d mem_type=%s allocator=%p" + " safe_to_remove=%s force=%s disconnect_cnt=%d", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->allocator, + __entry->safe_to_remove ? "true" : "false", + __entry->force ? "true" : "false", + __entry->disconnect_cnt + ) +); + +TRACE_EVENT(mem_connect, + + TP_PROTO(const struct xdp_mem_allocator *xa, + const struct xdp_rxq_info *rxq), + + TP_ARGS(xa, rxq), + + TP_STRUCT__entry( + __field(const struct xdp_mem_allocator *, xa) + __field(u32, mem_id) + __field(u32, mem_type) + __field(const void *, allocator) + __field(const struct xdp_rxq_info *, rxq) + __field(int, ifindex) + ), + + TP_fast_assign( + __entry->xa = xa; + __entry->mem_id = xa->mem.id; + __entry->mem_type = xa->mem.type; + __entry->allocator = xa->allocator; + __entry->rxq = rxq; + __entry->ifindex = rxq->dev->ifindex; + ), + + TP_printk("mem_id=%d mem_type=%s allocator=%p" + " ifindex=%d", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->allocator, + __entry->ifindex + ) +); + +TRACE_EVENT(mem_return_failed, + + TP_PROTO(const struct xdp_mem_info *mem, + const struct page *page), + + TP_ARGS(mem, page), + + TP_STRUCT__entry( + __field(const struct page *, page) + __field(u32, mem_id) + __field(u32, mem_type) + ), + + TP_fast_assign( + __entry->page = page; + __entry->mem_id = mem->id; + __entry->mem_type = mem->type; + ), + + TP_printk("mem_id=%d mem_type=%s page=%p", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->page + ) +); + #endif /* _TRACE_XDP_H */ #include diff --git a/net/core/xdp.c b/net/core/xdp.c index 622c81dc7ba8..b29d7b513a18 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -14,6 +14,8 @@ #include #include +#include /* struct xdp_mem_allocator */ +#include #define REG_STATE_NEW 0x0 #define REG_STATE_REGISTERED 0x1 @@ -29,21 +31,6 @@ static int mem_id_next = MEM_ID_MIN; static bool mem_id_init; /* false */ static struct rhashtable *mem_id_ht; -struct xdp_mem_allocator { - struct xdp_mem_info mem; - union { - void *allocator; - struct page_pool *page_pool; - struct zero_copy_allocator *zc_alloc; - }; - struct rhash_head node; - struct rcu_head rcu; - struct delayed_work defer_wq; - unsigned long defer_start; - unsigned long defer_warn; - int disconnect_cnt; -}; - static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) { const u32 *k = data; @@ -117,7 +104,7 @@ bool __mem_id_disconnect(int id, bool force) if (xa->mem.type == MEM_TYPE_PAGE_POOL) safe_to_remove = page_pool_request_shutdown(xa->page_pool); - /* TODO: Tracepoint will be added here in next-patch */ + trace_mem_disconnect(xa, safe_to_remove, force); if ((safe_to_remove || force) && !rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) @@ -385,6 +372,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, mutex_unlock(&mem_id_lock); + trace_mem_connect(xdp_alloc, xdp_rxq); return 0; err: mutex_unlock(&mem_id_lock); @@ -417,6 +405,7 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, } else { /* Hopefully stack show who to blame for late return */ WARN_ONCE(1, "page_pool gone mem.id=%d", mem->id); + trace_mem_return_failed(mem, page); put_page(page); } rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 32c28f7e413981c7dd4a3ad9bbb1151e4b654261 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 18 Jun 2019 15:06:03 +0200 Subject: page_pool: add tracepoints for page_pool with details need by XDP The xdp tracepoints for mem id disconnect don't carry information about, why it was not safe_to_remove. The tracepoint page_pool:page_pool_inflight in this patch can be used for extract this info for further debugging. This patchset also adds tracepoint for the pages_state_* release/hold transitions, including a pointer to the page. This can be used for stats about in-flight pages, or used to debug page leakage via keeping track of page pointer and combining this with kprobe for __put_page(). Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/trace/events/page_pool.h | 87 ++++++++++++++++++++++++++++++++++++++++ net/core/net-traces.c | 4 ++ net/core/page_pool.c | 9 ++++- 3 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 include/trace/events/page_pool.h (limited to 'include/trace') diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h new file mode 100644 index 000000000000..47b5ee880aa9 --- /dev/null +++ b/include/trace/events/page_pool.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM page_pool + +#if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_POOL_H + +#include +#include + +#include + +TRACE_EVENT(page_pool_inflight, + + TP_PROTO(const struct page_pool *pool, + s32 inflight, u32 hold, u32 release), + + TP_ARGS(pool, inflight, hold, release), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(s32, inflight) + __field(u32, hold) + __field(u32, release) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->inflight = inflight; + __entry->hold = hold; + __entry->release = release; + ), + + TP_printk("page_pool=%p inflight=%d hold=%u release=%u", + __entry->pool, __entry->inflight, __entry->hold, __entry->release) +); + +TRACE_EVENT(page_pool_state_release, + + TP_PROTO(const struct page_pool *pool, + const struct page *page, u32 release), + + TP_ARGS(pool, page, release), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(const struct page *, page) + __field(u32, release) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->page = page; + __entry->release = release; + ), + + TP_printk("page_pool=%p page=%p release=%u", + __entry->pool, __entry->page, __entry->release) +); + +TRACE_EVENT(page_pool_state_hold, + + TP_PROTO(const struct page_pool *pool, + const struct page *page, u32 hold), + + TP_ARGS(pool, page, hold), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(const struct page *, page) + __field(u32, hold) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->page = page; + __entry->hold = hold; + ), + + TP_printk("page_pool=%p page=%p hold=%u", + __entry->pool, __entry->page, __entry->hold) +); + +#endif /* _TRACE_PAGE_POOL_H */ + +/* This part must be outside protection */ +#include diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 470b179d599e..283ddb2dbc7d 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -43,6 +43,10 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); #endif +#if IS_ENABLED(CONFIG_PAGE_POOL) +#include +#endif + #include EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update); EXPORT_TRACEPOINT_SYMBOL_GPL(neigh_update_done); diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 42c3b0a5a259..f55ab055d543 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -4,6 +4,7 @@ * Author: Jesper Dangaard Brouer * Copyright (C) 2016 Red Hat, Inc. */ + #include #include #include @@ -14,6 +15,8 @@ #include #include /* for __put_page() */ +#include + static int page_pool_init(struct page_pool *pool, const struct page_pool_params *params) { @@ -156,6 +159,8 @@ skip_dma_map: /* Track how many pages are held 'in-flight' */ pool->pages_state_hold_cnt++; + trace_page_pool_state_hold(pool, page, pool->pages_state_hold_cnt); + /* When page just alloc'ed is should/must have refcnt 1. */ return page; } @@ -191,7 +196,7 @@ static s32 page_pool_inflight(struct page_pool *pool) distance = _distance(hold_cnt, release_cnt); - /* TODO: Add tracepoint here */ + trace_page_pool_inflight(pool, distance, hold_cnt, release_cnt); return distance; } @@ -222,6 +227,8 @@ static void __page_pool_clean_page(struct page_pool *pool, page->dma_addr = 0; skip_dma_unmap: atomic_inc(&pool->pages_state_release_cnt); + trace_page_pool_state_release(pool, page, + atomic_read(&pool->pages_state_release_cnt)); } /* unmap the page and clean our state */ -- cgit v1.2.3-70-g09d2 From e7d4798960b3ebcd243ae6a59e04d4fe6518c96c Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Thu, 13 Jun 2019 18:39:58 +0900 Subject: xdp: Add tracepoint for bulk XDP_TX This is introduced for admins to check what is happening on XDP_TX when bulk XDP_TX is in use, which will be first introduced in veth in next commit. v3: - Add act field to be in line with other XDP tracepoints. Signed-off-by: Toshiaki Makita Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- include/trace/events/xdp.h | 29 +++++++++++++++++++++++++++++ kernel/bpf/core.c | 1 + 2 files changed, 30 insertions(+) (limited to 'include/trace') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index bb5e380e2ef3..81e708c4b513 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -50,6 +50,35 @@ TRACE_EVENT(xdp_exception, __entry->ifindex) ); +TRACE_EVENT(xdp_bulk_tx, + + TP_PROTO(const struct net_device *dev, + int sent, int drops, int err), + + TP_ARGS(dev, sent, drops, err), + + TP_STRUCT__entry( + __field(int, ifindex) + __field(u32, act) + __field(int, drops) + __field(int, sent) + __field(int, err) + ), + + TP_fast_assign( + __entry->ifindex = dev->ifindex; + __entry->act = XDP_TX; + __entry->drops = drops; + __entry->sent = sent; + __entry->err = err; + ), + + TP_printk("ifindex=%d action=%s sent=%d drops=%d err=%d", + __entry->ifindex, + __print_symbolic(__entry->act, __XDP_ACT_SYM_TAB), + __entry->sent, __entry->drops, __entry->err) +); + DECLARE_EVENT_CLASS(xdp_redirect_template, TP_PROTO(const struct net_device *dev, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index ad3be85f1411..561ed07d3007 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2101,3 +2101,4 @@ EXPORT_SYMBOL(bpf_stats_enabled_key); #include EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_exception); +EXPORT_TRACEPOINT_SYMBOL_GPL(xdp_bulk_tx); -- cgit v1.2.3-70-g09d2 From 43e74c0267a35d6f5127218054b2d80c7fe801f5 Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Fri, 28 Jun 2019 11:12:34 +0200 Subject: bpf_xdp_redirect_map: Perform map lookup in eBPF helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bpf_redirect_map() helper used by XDP programs doesn't return any indication of whether it can successfully redirect to the map index it was given. Instead, BPF programs have to track this themselves, leading to programs using duplicate maps to track which entries are populated in the devmap. This patch fixes this by moving the map lookup into the bpf_redirect_map() helper, which makes it possible to return failure to the eBPF program. The lower bits of the flags argument is used as the return code, which means that existing users who pass a '0' flag argument will get XDP_ABORTED. With this, a BPF program can check the return code from the helper call and react by, for instance, substituting a different redirect. This works for any type of map used for redirect. Signed-off-by: Toke Høiland-Jørgensen Acked-by: Jonathan Lemon Acked-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann --- include/linux/filter.h | 1 + include/trace/events/xdp.h | 5 ++--- include/uapi/linux/bpf.h | 7 +++++-- net/core/filter.c | 32 ++++++++++++++++++-------------- 4 files changed, 26 insertions(+), 19 deletions(-) (limited to 'include/trace') diff --git a/include/linux/filter.h b/include/linux/filter.h index 92bd192f7786..1fe53e78c7e3 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -580,6 +580,7 @@ struct bpf_skb_data_end { struct bpf_redirect_info { u32 flags; u32 tgt_index; + void *tgt_value; struct bpf_map *map; struct bpf_map *map_to_flush; u32 kern_flags; diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 81e708c4b513..68899fdc985b 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -175,9 +175,8 @@ struct _bpf_dtab_netdev { #endif /* __DEVMAP_OBJ_TYPE */ #define devmap_ifindex(fwd, map) \ - (!fwd ? 0 : \ - ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ - ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)) + ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ + ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0) #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index a396b516a2b2..cffea1826a1f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1571,8 +1571,11 @@ union bpf_attr { * but this is only implemented for native XDP (with driver * support) as of this writing). * - * All values for *flags* are reserved for future usage, and must - * be left at zero. + * The lower two bits of *flags* are used as the return code if + * the map lookup fails. This is so that the return value can be + * one of the XDP program return codes up to XDP_TX, as chosen by + * the caller. Any higher bits in the *flags* argument must be + * unset. * * When used to redirect packets to net devices, this helper * provides a high performance increase over **bpf_redirect**\ (). diff --git a/net/core/filter.c b/net/core/filter.c index b4a062379bb9..4836264f82ee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3605,17 +3605,13 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct bpf_redirect_info *ri) { u32 index = ri->tgt_index; - void *fwd = NULL; + void *fwd = ri->tgt_value; int err; ri->tgt_index = 0; + ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - fwd = __xdp_map_lookup_elem(map, index); - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; - } if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) xdp_do_flush_map(); @@ -3652,18 +3648,13 @@ static int xdp_do_generic_redirect_map(struct net_device *dev, { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); u32 index = ri->tgt_index; - void *fwd = NULL; + void *fwd = ri->tgt_value; int err = 0; ri->tgt_index = 0; + ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - fwd = __xdp_map_lookup_elem(map, index); - if (unlikely(!fwd)) { - err = -EINVAL; - goto err; - } - if (map->map_type == BPF_MAP_TYPE_DEVMAP) { struct bpf_dtab_netdev *dst = fwd; @@ -3732,6 +3723,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) ri->flags = flags; ri->tgt_index = ifindex; + ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); return XDP_REDIRECT; @@ -3750,9 +3742,21 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - if (unlikely(flags)) + /* Lower bits of the flags are used as return code on lookup failure */ + if (unlikely(flags > XDP_TX)) return XDP_ABORTED; + ri->tgt_value = __xdp_map_lookup_elem(map, ifindex); + if (unlikely(!ri->tgt_value)) { + /* If the lookup fails we want to clear out the state in the + * redirect_info struct completely, so that if an eBPF program + * performs multiple lookups, the last one always takes + * precedence. + */ + WRITE_ONCE(ri->map, NULL); + return flags; + } + ri->flags = flags; ri->tgt_index = ifindex; WRITE_ONCE(ri->map, map); -- cgit v1.2.3-70-g09d2 From 99f0eae653b2db64917d0b58099eb51e300b311d Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jul 2019 16:04:19 +0100 Subject: rxrpc: Fix oops in tracepoint If the rxrpc_eproto tracepoint is enabled, an oops will be cause by the trace line that rxrpc_extract_header() tries to emit when a protocol error occurs (typically because the packet is short) because the call argument is NULL. Fix this by using ?: to assume 0 as the debug_id if call is NULL. This can then be induced by: echo -e '\0\0\0\0\0\0\0\0' | ncat -4u --send-only 20001 where addr has the following program running on it: #include #include #include #include #include #include #include int main(void) { struct sockaddr_rxrpc srx; int fd; memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; srx.srx_service = 0; srx.transport_type = AF_INET; srx.transport_len = sizeof(srx.transport.sin); srx.transport.sin.sin_family = AF_INET; srx.transport.sin.sin_port = htons(0x4e21); fd = socket(AF_RXRPC, SOCK_DGRAM, AF_INET6); bind(fd, (struct sockaddr *)&srx, sizeof(srx)); sleep(20); return 0; } It results in the following oops. BUG: kernel NULL pointer dereference, address: 0000000000000340 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page ... RIP: 0010:trace_event_raw_event_rxrpc_rx_eproto+0x47/0xac ... Call Trace: rxrpc_extract_header+0x86/0x171 ? rcu_read_lock_sched_held+0x5d/0x63 ? rxrpc_new_skb+0xd4/0x109 rxrpc_input_packet+0xef/0x14fc ? rxrpc_input_data+0x986/0x986 udp_queue_rcv_one_skb+0xbf/0x3d0 udp_unicast_rcv_skb.isra.8+0x64/0x71 ip_protocol_deliver_rcu+0xe4/0x1b4 ip_local_deliver+0xf0/0x154 __netif_receive_skb_one_core+0x50/0x6c netif_receive_skb_internal+0x26b/0x2e9 napi_gro_receive+0xf8/0x1da rtl8169_poll+0x303/0x4c4 net_rx_action+0x10e/0x333 __do_softirq+0x1a5/0x38f irq_exit+0x54/0xc4 do_IRQ+0xda/0xf8 common_interrupt+0xf/0xf ... ? cpuidle_enter_state+0x23c/0x34d cpuidle_enter+0x2a/0x36 do_idle+0x163/0x1ea cpu_startup_entry+0x1d/0x1f start_secondary+0x157/0x172 secondary_startup_64+0xa4/0xb0 Fixes: a25e21f0bcd2 ("rxrpc, afs: Use debug_ids rather than pointers in traces") Signed-off-by: David Howells Reviewed-by: Marc Dionne Signed-off-by: David S. Miller --- include/trace/events/rxrpc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/trace') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d85816878a52..cc1d060cbf13 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1379,7 +1379,7 @@ TRACE_EVENT(rxrpc_rx_eproto, ), TP_fast_assign( - __entry->call = call->debug_id; + __entry->call = call ? call->debug_id : 0; __entry->serial = serial; __entry->why = why; ), -- cgit v1.2.3-70-g09d2