From 609320c8a22715b74b39796930c3542719f8ab62 Mon Sep 17 00:00:00 2001 From: Yonghong Song <yhs@fb.com> Date: Thu, 7 Sep 2017 18:36:15 -0700 Subject: perf/bpf: fix a clang compilation issue clang does not support variable length array for structure member. It has the following error during compilation: kernel/trace/trace_syscalls.c:568:17: error: fields must have a constant size: 'variable length array in structure' extension will never be supported unsigned long args[sys_data->nb_args]; ^ The fix is to use a fixed array length instead. Reported-by: Nick Desaulniers <ndesaulniers@google.com> Signed-off-by: Yonghong Song <yhs@fb.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 88951b795ee3..95606a2d556f 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -200,6 +200,8 @@ static inline int is_syscall_trace_event(struct trace_event_call *tp_event) #define SYSCALL_DEFINE5(name, ...) SYSCALL_DEFINEx(5, _##name, __VA_ARGS__) #define SYSCALL_DEFINE6(name, ...) SYSCALL_DEFINEx(6, _##name, __VA_ARGS__) +#define SYSCALL_DEFINE_MAXARGS 6 + #define SYSCALL_DEFINEx(x, sname, ...) \ SYSCALL_METADATA(sname, x, __VA_ARGS__) \ __SYSCALL_DEFINEx(x, sname, __VA_ARGS__) -- cgit v1.2.3-70-g09d2 From 96c5508e3012ed0984ab93821d64ac1ff3279c09 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer <brouer@redhat.com> Date: Sun, 10 Sep 2017 09:47:02 +0200 Subject: xdp: implement xdp_redirect_map for generic XDP Using bpf_redirect_map is allowed for generic XDP programs, but the appropriate map lookup was never performed in xdp_do_generic_redirect(). Instead the map-index is directly used as the ifindex. For the xdp_redirect_map sample in SKB-mode '-S', this resulted in trying sending on ifindex 0 which isn't valid, resulting in getting SKB packets dropped. Thus, the reported performance numbers are wrong in commit 24251c264798 ("samples/bpf: add option for native and skb mode for redirect apps") for the 'xdp_redirect_map -S' case. Before commit 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") it could crash the kernel. Like this commit also check that the map_owner owner is correct before dereferencing the map pointer. But make sure that this API misusage can be caught by a tracepoint. Thus, allowing userspace via tracepoints to detect misbehaving bpf_progs. Fixes: 6103aa96ec07 ("net: implement XDP_REDIRECT for xdp generic") Fixes: 24251c264798 ("samples/bpf: add option for native and skb mode for redirect apps") Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/trace/events/xdp.h | 4 ++-- net/core/filter.c | 38 ++++++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 862575ac8da9..4e16c43fba10 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -138,11 +138,11 @@ DEFINE_EVENT_PRINT(xdp_redirect_template, xdp_redirect_map_err, #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, fwd ? fwd->ifindex : 0, \ - 0, map, idx); + 0, map, idx) #define _trace_xdp_redirect_map_err(dev, xdp, fwd, map, idx, err) \ trace_xdp_redirect_map_err(dev, xdp, fwd ? fwd->ifindex : 0, \ - err, map, idx); + err, map, idx) #endif /* _TRACE_XDP_H */ diff --git a/net/core/filter.c b/net/core/filter.c index 3a50a9b021e2..24dd33dd9f04 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2506,21 +2506,19 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, struct redirect_info *ri = this_cpu_ptr(&redirect_info); const struct bpf_prog *map_owner = ri->map_owner; struct bpf_map *map = ri->map; + struct net_device *fwd = NULL; u32 index = ri->ifindex; - struct net_device *fwd; int err; ri->ifindex = 0; ri->map = NULL; ri->map_owner = NULL; - /* This is really only caused by a deliberately crappy - * BPF program, normally we would never hit that case, - * so no need to inform someone via tracepoints either, - * just bail out. - */ - if (unlikely(map_owner != xdp_prog)) - return -EINVAL; + if (unlikely(map_owner != xdp_prog)) { + err = -EFAULT; + map = NULL; + goto err; + } fwd = __dev_map_lookup_elem(map, index); if (!fwd) { @@ -2576,13 +2574,27 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct bpf_prog *xdp_prog) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); + const struct bpf_prog *map_owner = ri->map_owner; + struct bpf_map *map = ri->map; + struct net_device *fwd = NULL; u32 index = ri->ifindex; - struct net_device *fwd; unsigned int len; int err = 0; - fwd = dev_get_by_index_rcu(dev_net(dev), index); ri->ifindex = 0; + ri->map = NULL; + ri->map_owner = NULL; + + if (map) { + if (unlikely(map_owner != xdp_prog)) { + err = -EFAULT; + map = NULL; + goto err; + } + fwd = __dev_map_lookup_elem(map, index); + } else { + fwd = dev_get_by_index_rcu(dev_net(dev), index); + } if (unlikely(!fwd)) { err = -EINVAL; goto err; @@ -2600,10 +2612,12 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, } skb->dev = fwd; - _trace_xdp_redirect(dev, xdp_prog, index); + map ? _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index) + : _trace_xdp_redirect(dev, xdp_prog, index); return 0; err: - _trace_xdp_redirect_err(dev, xdp_prog, index, err); + map ? _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err) + : _trace_xdp_redirect_err(dev, xdp_prog, index, err); return err; } EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); -- cgit v1.2.3-70-g09d2 From d7fb60b9cafb982cb2e46a267646a8dfd4f2e5da Mon Sep 17 00:00:00 2001 From: Cong Wang <xiyou.wangcong@gmail.com> Date: Mon, 11 Sep 2017 16:33:30 -0700 Subject: net_sched: get rid of tcfa_rcu gen estimator has been rewritten in commit 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators"), the caller is no longer needed to wait for a grace period. So this patch gets rid of it. This also completely closes a race condition between action free path and filter chain add/remove path for the following patch. Because otherwise the nested RCU callback can't be caught by rcu_barrier(). Please see also the comments in code. Cc: Jiri Pirko <jiri@mellanox.com> Cc: Jamal Hadi Salim <jhs@mojatatu.com> Cc: Eric Dumazet <edumazet@google.com> Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/act_api.h | 2 -- net/sched/act_api.c | 17 ++++++++--------- 2 files changed, 8 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 8f3d5d8b5ae0..b944e0eb93be 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -34,7 +34,6 @@ struct tc_action { struct gnet_stats_queue tcfa_qstats; struct net_rate_estimator __rcu *tcfa_rate_est; spinlock_t tcfa_lock; - struct rcu_head tcfa_rcu; struct gnet_stats_basic_cpu __percpu *cpu_bstats; struct gnet_stats_queue __percpu *cpu_qstats; struct tc_cookie *act_cookie; @@ -50,7 +49,6 @@ struct tc_action { #define tcf_qstats common.tcfa_qstats #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock -#define tcf_rcu common.tcfa_rcu /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. diff --git a/net/sched/act_api.c b/net/sched/act_api.c index a306974e2fb4..fcd7dc7b807a 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -53,10 +53,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a, res->goto_tp = rcu_dereference_bh(chain->filter_chain); } -static void free_tcf(struct rcu_head *head) +/* XXX: For standalone actions, we don't need a RCU grace period either, because + * actions are always connected to filters and filters are already destroyed in + * RCU callbacks, so after a RCU grace period actions are already disconnected + * from filters. Readers later can not find us. + */ +static void free_tcf(struct tc_action *p) { - struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu); - free_percpu(p->cpu_bstats); free_percpu(p->cpu_qstats); @@ -76,11 +79,7 @@ static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); spin_unlock_bh(&idrinfo->lock); gen_kill_estimator(&p->tcfa_rate_est); - /* - * gen_estimator est_timer() might access p->tcfa_lock - * or bstats, wait a RCU grace period before freeing p - */ - call_rcu(&p->tcfa_rcu, free_tcf); + free_tcf(p); } int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) @@ -259,7 +258,7 @@ void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) { if (est) gen_kill_estimator(&a->tcfa_rate_est); - call_rcu(&a->tcfa_rcu, free_tcf); + free_tcf(a); } EXPORT_SYMBOL(tcf_idr_cleanup); -- cgit v1.2.3-70-g09d2 From fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 Mon Sep 17 00:00:00 2001 From: Dan Carpenter <dan.carpenter@oracle.com> Date: Thu, 14 Sep 2017 02:00:54 +0300 Subject: sctp: potential read out of bounds in sctp_ulpevent_type_enabled() This code causes a static checker warning because Smatch doesn't trust anything that comes from skb->data. I've reviewed this code and I do think skb->data can be controlled by the user here. The sctp_event_subscribe struct has 13 __u8 fields and we want to see if ours is non-zero. sn_type can be any value in the 0-USHRT_MAX range. We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read either before the start of the struct or after the end. This is a very old bug and it's surprising that it would go undetected for so long but my theory is that it just doesn't have a big impact so it would be hard to notice. Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/sctp/ulpevent.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index 1060494ac230..b8c86ec1a8f5 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -153,8 +153,12 @@ __u16 sctp_ulpevent_get_notification_type(const struct sctp_ulpevent *event); static inline int sctp_ulpevent_type_enabled(__u16 sn_type, struct sctp_event_subscribe *mask) { + int offset = sn_type - SCTP_SN_TYPE_BASE; char *amask = (char *) mask; - return amask[sn_type - SCTP_SN_TYPE_BASE]; + + if (offset >= sizeof(struct sctp_event_subscribe)) + return 0; + return amask[offset]; } /* Given an event subscription, is this event enabled? */ -- cgit v1.2.3-70-g09d2 From d25adbeb0cdb860fb39e09cdd025e9cfc954c5ab Mon Sep 17 00:00:00 2001 From: Xin Long <lucien.xin@gmail.com> Date: Fri, 15 Sep 2017 11:02:21 +0800 Subject: sctp: fix an use-after-free issue in sctp_sock_dump Commit 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") tried to fix an use-after-free issue by checking !sctp_sk(sk)->ep with holding sock and sock lock. But Paolo noticed that endpoint could be destroyed in sctp_rcv without sock lock protection. It means the use-after-free issue still could be triggered when sctp_rcv put and destroy ep after sctp_sock_dump checks !ep, although it's pretty hard to reproduce. I could reproduce it by mdelay in sctp_rcv while msleep in sctp_close and sctp_sock_dump long time. This patch is to add another param cb_done to sctp_for_each_transport and dump ep->assocs with holding tsp after jumping out of transport's traversal in it to avoid this issue. It can also improve sctp diag dump to make it run faster, as no need to save sk into cb->args[5] and keep calling sctp_for_each_transport any more. This patch is also to use int * instead of int for the pos argument in sctp_for_each_transport, which could make postion increment only in sctp_for_each_transport and no need to keep changing cb->args[2] in sctp_sock_filter and sctp_sock_dump any more. Fixes: 86fdb3448cc1 ("sctp: ensure ep is not destroyed before doing the dump") Reported-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Xin Long <lucien.xin@gmail.com> Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> Acked-by: Neil Horman <nhorman@tuxdriver.com> Signed-off-by: David S. Miller <davem@davemloft.net> --- include/net/sctp/sctp.h | 3 ++- net/sctp/sctp_diag.c | 32 +++++++++----------------------- net/sctp/socket.c | 40 +++++++++++++++++++++++++--------------- 3 files changed, 36 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 06b4f515e157..d7d8cba01469 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -127,7 +127,8 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), const union sctp_addr *laddr, const union sctp_addr *paddr, void *p); int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - struct net *net, int pos, void *p); + int (*cb_done)(struct sctp_transport *, void *), + struct net *net, int *pos, void *p); int sctp_for_each_endpoint(int (*cb)(struct sctp_endpoint *, void *), void *p); int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, struct sctp_info *info); diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index e99518e79b52..7008a992749b 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -279,9 +279,11 @@ out: return err; } -static int sctp_sock_dump(struct sock *sk, void *p) +static int sctp_sock_dump(struct sctp_transport *tsp, void *p) { + struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; @@ -289,9 +291,7 @@ static int sctp_sock_dump(struct sock *sk, void *p) int err = 0; lock_sock(sk); - if (!sctp_sk(sk)->ep) - goto release; - list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) { + list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -327,40 +327,30 @@ next: cb->args[4]++; } cb->args[1] = 0; - cb->args[2]++; cb->args[3] = 0; cb->args[4] = 0; release: release_sock(sk); - sock_put(sk); return err; } -static int sctp_get_sock(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_transport *tsp, void *p) { struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; - struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; struct sctp_association *assoc = list_entry(ep->asocs.next, struct sctp_association, asocs); /* find the ep only once through the transports by this condition */ if (tsp->asoc != assoc) - goto out; + return 0; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) - goto out; - - sock_hold(sk); - cb->args[5] = (long)sk; + return 0; return 1; - -out: - cb->args[2]++; - return 0; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) @@ -503,12 +493,8 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; -next: - cb->args[5] = 0; - sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp); - - if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp)) - goto next; + sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, + net, (int *)&cb->args[2], &commp); done: cb->args[1] = cb->args[4]; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1b00a1e09b93..d4730ada7f32 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4658,29 +4658,39 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - struct net *net, int pos, void *p) { + int (*cb_done)(struct sctp_transport *, void *), + struct net *net, int *pos, void *p) { struct rhashtable_iter hti; - void *obj; - int err; - - err = sctp_transport_walk_start(&hti); - if (err) - return err; + struct sctp_transport *tsp; + int ret; - obj = sctp_transport_get_idx(net, &hti, pos + 1); - for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) { - struct sctp_transport *transport = obj; +again: + ret = sctp_transport_walk_start(&hti); + if (ret) + return ret; - if (!sctp_transport_hold(transport)) + tsp = sctp_transport_get_idx(net, &hti, *pos + 1); + for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { + if (!sctp_transport_hold(tsp)) continue; - err = cb(transport, p); - sctp_transport_put(transport); - if (err) + ret = cb(tsp, p); + if (ret) break; + (*pos)++; + sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); - return err; + if (ret) { + if (cb_done && !cb_done(tsp, p)) { + (*pos)++; + sctp_transport_put(tsp); + goto again; + } + sctp_transport_put(tsp); + } + + return ret; } EXPORT_SYMBOL_GPL(sctp_for_each_transport); -- cgit v1.2.3-70-g09d2