From 14d32b2525dd3a21c29b0d9edf1748f5dabc1bbc Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 31 Jul 2018 18:03:32 +0200 Subject: jiffies: add utility function to calculate delta in ms add jiffies_delta_to_msecs() helper func to calculate the delta between two times and eventually 0 if negative. Suggested-by: Eric Dumazet Signed-off-by: Matteo Croce Reviewed-by: Eric Dumazet Acked-by: Simon Horman Signed-off-by: Pablo Neira Ayuso --- include/linux/jiffies.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index a27cf6652327..fa928242567d 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -447,6 +447,11 @@ static inline clock_t jiffies_delta_to_clock_t(long delta) return jiffies_to_clock_t(max(0L, delta)); } +static inline unsigned int jiffies_delta_to_msecs(long delta) +{ + return jiffies_to_msecs(max(0L, delta)); +} + extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); -- cgit v1.2.3-70-g09d2 From 4ef360dd6a65f6ef337645e1b65e744034754b19 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 26 Jul 2018 00:39:51 +0900 Subject: netfilter: nft_set: fix allocation size overflow in privsize callback. In order to determine allocation size of set, ->privsize is invoked. At this point, both desc->size and size of each data structure of set are used. desc->size means number of element that is given by user. desc->size is u32 type. so that upperlimit of set element is 4294967295. but return type of ->privsize is also u32. hence overflow can occurred. test commands: %nft add table ip filter %nft add set ip filter hash1 { type ipv4_addr \; size 4294967295 \; } %nft list ruleset splat looks like: [ 1239.202910] kasan: CONFIG_KASAN_INLINE enabled [ 1239.208788] kasan: GPF could be caused by NULL-ptr deref or user memory access [ 1239.217625] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC KASAN PTI [ 1239.219329] CPU: 0 PID: 1603 Comm: nft Not tainted 4.18.0-rc5+ #7 [ 1239.229091] RIP: 0010:nft_hash_walk+0x1d2/0x310 [nf_tables_set] [ 1239.229091] Code: 84 d2 7f 10 4c 89 e7 89 44 24 38 e8 d8 5a 17 e0 8b 44 24 38 48 8d 7b 10 41 0f b6 0c 24 48 89 fa 48 89 fe 48 c1 ea 03 83 e6 07 <42> 0f b6 14 3a 40 38 f2 7f 1a 84 d2 74 16 [ 1239.229091] RSP: 0018:ffff8801118cf358 EFLAGS: 00010246 [ 1239.229091] RAX: 0000000000000000 RBX: 0000000000020400 RCX: 0000000000000001 [ 1239.229091] RDX: 0000000000004082 RSI: 0000000000000000 RDI: 0000000000020410 [ 1239.229091] RBP: ffff880114d5a988 R08: 0000000000007e94 R09: ffff880114dd8030 [ 1239.229091] R10: ffff880114d5a988 R11: ffffed00229bb006 R12: ffff8801118cf4d0 [ 1239.229091] R13: ffff8801118cf4d8 R14: 0000000000000000 R15: dffffc0000000000 [ 1239.229091] FS: 00007f5a8fe0b700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 1239.229091] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1239.229091] CR2: 00007f5a8ecc27b0 CR3: 000000010608e000 CR4: 00000000001006f0 [ 1239.229091] Call Trace: [ 1239.229091] ? nft_hash_remove+0xf0/0xf0 [nf_tables_set] [ 1239.229091] ? memset+0x1f/0x40 [ 1239.229091] ? __nla_reserve+0x9f/0xb0 [ 1239.229091] ? memcpy+0x34/0x50 [ 1239.229091] nf_tables_dump_set+0x9a1/0xda0 [nf_tables] [ 1239.229091] ? __kmalloc_reserve.isra.29+0x2e/0xa0 [ 1239.229091] ? nft_chain_hash_obj+0x630/0x630 [nf_tables] [ 1239.229091] ? nf_tables_commit+0x2c60/0x2c60 [nf_tables] [ 1239.229091] netlink_dump+0x470/0xa20 [ 1239.229091] __netlink_dump_start+0x5ae/0x690 [ 1239.229091] nft_netlink_dump_start_rcu+0xd1/0x160 [nf_tables] [ 1239.229091] nf_tables_getsetelem+0x2e5/0x4b0 [nf_tables] [ 1239.229091] ? nft_get_set_elem+0x440/0x440 [nf_tables] [ 1239.229091] ? nft_chain_hash_obj+0x630/0x630 [nf_tables] [ 1239.229091] ? nf_tables_dump_obj_done+0x70/0x70 [nf_tables] [ 1239.229091] ? nla_parse+0xab/0x230 [ 1239.229091] ? nft_get_set_elem+0x440/0x440 [nf_tables] [ 1239.229091] nfnetlink_rcv_msg+0x7f0/0xab0 [nfnetlink] [ 1239.229091] ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink] [ 1239.229091] ? debug_show_all_locks+0x290/0x290 [ 1239.229091] ? sched_clock_cpu+0x132/0x170 [ 1239.229091] ? find_held_lock+0x39/0x1b0 [ 1239.229091] ? sched_clock_local+0x10d/0x130 [ 1239.229091] netlink_rcv_skb+0x211/0x320 [ 1239.229091] ? nfnetlink_bind+0x1d0/0x1d0 [nfnetlink] [ 1239.229091] ? netlink_ack+0x7b0/0x7b0 [ 1239.229091] ? ns_capable_common+0x6e/0x110 [ 1239.229091] nfnetlink_rcv+0x2d1/0x310 [nfnetlink] [ 1239.229091] ? nfnetlink_rcv_batch+0x10f0/0x10f0 [nfnetlink] [ 1239.229091] ? netlink_deliver_tap+0x829/0x930 [ 1239.229091] ? lock_acquire+0x265/0x2e0 [ 1239.229091] netlink_unicast+0x406/0x520 [ 1239.509725] ? netlink_attachskb+0x5b0/0x5b0 [ 1239.509725] ? find_held_lock+0x39/0x1b0 [ 1239.509725] netlink_sendmsg+0x987/0xa20 [ 1239.509725] ? netlink_unicast+0x520/0x520 [ 1239.509725] ? _copy_from_user+0xa9/0xc0 [ 1239.509725] __sys_sendto+0x21a/0x2c0 [ 1239.509725] ? __ia32_sys_getpeername+0xa0/0xa0 [ 1239.509725] ? retint_kernel+0x10/0x10 [ 1239.509725] ? sched_clock_cpu+0x132/0x170 [ 1239.509725] ? find_held_lock+0x39/0x1b0 [ 1239.509725] ? lock_downgrade+0x540/0x540 [ 1239.509725] ? up_read+0x1c/0x100 [ 1239.509725] ? __do_page_fault+0x763/0x970 [ 1239.509725] ? retint_user+0x18/0x18 [ 1239.509725] __x64_sys_sendto+0x177/0x180 [ 1239.509725] do_syscall_64+0xaa/0x360 [ 1239.509725] entry_SYSCALL_64_after_hwframe+0x49/0xbe [ 1239.509725] RIP: 0033:0x7f5a8f468e03 [ 1239.509725] Code: 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb d0 0f 1f 84 00 00 00 00 00 83 3d 49 c9 2b 00 00 75 13 49 89 ca b8 2c 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 34 c3 48 83 ec 08 e8 [ 1239.509725] RSP: 002b:00007ffd78d0b778 EFLAGS: 00000246 ORIG_RAX: 000000000000002c [ 1239.509725] RAX: ffffffffffffffda RBX: 00007ffd78d0c890 RCX: 00007f5a8f468e03 [ 1239.509725] RDX: 0000000000000034 RSI: 00007ffd78d0b7e0 RDI: 0000000000000003 [ 1239.509725] RBP: 00007ffd78d0b7d0 R08: 00007f5a8f15c160 R09: 000000000000000c [ 1239.509725] R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffd78d0b7e0 [ 1239.509725] R13: 0000000000000034 R14: 00007f5a8f9aff60 R15: 00005648040094b0 [ 1239.509725] Modules linked in: nf_tables_set nf_tables nfnetlink ip_tables x_tables [ 1239.670713] ---[ end trace 39375adcda140f11 ]--- [ 1239.676016] RIP: 0010:nft_hash_walk+0x1d2/0x310 [nf_tables_set] [ 1239.682834] Code: 84 d2 7f 10 4c 89 e7 89 44 24 38 e8 d8 5a 17 e0 8b 44 24 38 48 8d 7b 10 41 0f b6 0c 24 48 89 fa 48 89 fe 48 c1 ea 03 83 e6 07 <42> 0f b6 14 3a 40 38 f2 7f 1a 84 d2 74 16 [ 1239.705108] RSP: 0018:ffff8801118cf358 EFLAGS: 00010246 [ 1239.711115] RAX: 0000000000000000 RBX: 0000000000020400 RCX: 0000000000000001 [ 1239.719269] RDX: 0000000000004082 RSI: 0000000000000000 RDI: 0000000000020410 [ 1239.727401] RBP: ffff880114d5a988 R08: 0000000000007e94 R09: ffff880114dd8030 [ 1239.735530] R10: ffff880114d5a988 R11: ffffed00229bb006 R12: ffff8801118cf4d0 [ 1239.743658] R13: ffff8801118cf4d8 R14: 0000000000000000 R15: dffffc0000000000 [ 1239.751785] FS: 00007f5a8fe0b700(0000) GS:ffff88011b600000(0000) knlGS:0000000000000000 [ 1239.760993] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 1239.767560] CR2: 00007f5a8ecc27b0 CR3: 000000010608e000 CR4: 00000000001006f0 [ 1239.775679] Kernel panic - not syncing: Fatal exception [ 1239.776630] Kernel Offset: 0x1f000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) [ 1239.776630] Rebooting in 5 seconds.. Fixes: 20a69341f2d0 ("netfilter: nf_tables: add netlink set API") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 4 ++-- net/netfilter/nf_tables_api.c | 2 +- net/netfilter/nft_set_bitmap.c | 6 +++--- net/netfilter/nft_set_hash.c | 8 ++++---- net/netfilter/nft_set_rbtree.c | 4 ++-- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index dc417ef0a0c5..552bfbef1bf1 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -274,7 +274,7 @@ enum nft_set_class { * @space: memory class */ struct nft_set_estimate { - unsigned int size; + u64 size; enum nft_set_class lookup; enum nft_set_class space; }; @@ -336,7 +336,7 @@ struct nft_set_ops { const struct nft_set_elem *elem, unsigned int flags); - unsigned int (*privsize)(const struct nlattr * const nla[], + u64 (*privsize)(const struct nlattr * const nla[], const struct nft_set_desc *desc); bool (*estimate)(const struct nft_set_desc *desc, u32 features, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 67cdd5c4f4f5..3008f93469c4 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3354,7 +3354,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nft_set *set; struct nft_ctx ctx; char *name; - unsigned int size; + u64 size; u64 timeout; u32 ktype, dtype, flags, policy, gc_int, objtype; struct nft_set_desc desc; diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 128bc16f52dd..f866bd41e5d2 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -248,13 +248,13 @@ static inline u32 nft_bitmap_size(u32 klen) return ((2 << ((klen * BITS_PER_BYTE) - 1)) / BITS_PER_BYTE) << 1; } -static inline u32 nft_bitmap_total_size(u32 klen) +static inline u64 nft_bitmap_total_size(u32 klen) { return sizeof(struct nft_bitmap) + nft_bitmap_size(klen); } -static unsigned int nft_bitmap_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_bitmap_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { u32 klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN])); diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 90c3e7e6cacb..015124e649cb 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -341,8 +341,8 @@ schedule: nft_set_gc_interval(set)); } -static unsigned int nft_rhash_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_rhash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rhash); } @@ -585,8 +585,8 @@ cont: } } -static unsigned int nft_hash_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_hash_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_hash) + nft_hash_buckets(desc->size) * sizeof(struct hlist_head); diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 9873d734b494..55e2d9215c0d 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -411,8 +411,8 @@ static void nft_rbtree_gc(struct work_struct *work) nft_set_gc_interval(set)); } -static unsigned int nft_rbtree_privsize(const struct nlattr * const nla[], - const struct nft_set_desc *desc) +static u64 nft_rbtree_privsize(const struct nlattr * const nla[], + const struct nft_set_desc *desc) { return sizeof(struct nft_rbtree); } -- cgit v1.2.3-70-g09d2 From d209df3e7f7002d9099fdb0f6df0f972b4386a63 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 2 Aug 2018 21:44:40 +0200 Subject: netfilter: nf_tables: fix register ordering We must register nfnetlink ops last, as that exposes nf_tables to userspace. Without this, we could theoretically get nfnetlink request before net->nft state has been initialized. Fixes: 99633ab29b213 ("netfilter: nf_tables: complete net namespace support") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 2 +- net/netfilter/nf_tables_api.c | 29 ++++++++++++++++++++++------- net/netfilter/nft_chain_filter.c | 2 +- 3 files changed, 24 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 552bfbef1bf1..0f39ac487012 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1374,6 +1374,6 @@ struct nft_trans_flowtable { (((struct nft_trans_flowtable *)trans->data)->flowtable) int __init nft_chain_filter_init(void); -void __exit nft_chain_filter_fini(void); +void nft_chain_filter_fini(void); #endif /* _NET_NF_TABLES_H */ diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 3008f93469c4..80636cc59686 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -7273,21 +7273,36 @@ static int __init nf_tables_module_init(void) { int err; - nft_chain_filter_init(); + err = register_pernet_subsys(&nf_tables_net_ops); + if (err < 0) + return err; + + err = nft_chain_filter_init(); + if (err < 0) + goto err1; err = nf_tables_core_module_init(); if (err < 0) - return err; + goto err2; - err = nfnetlink_subsys_register(&nf_tables_subsys); + err = register_netdevice_notifier(&nf_tables_flowtable_notifier); if (err < 0) - goto err; + goto err3; - register_netdevice_notifier(&nf_tables_flowtable_notifier); + /* must be last */ + err = nfnetlink_subsys_register(&nf_tables_subsys); + if (err < 0) + goto err4; - return register_pernet_subsys(&nf_tables_net_ops); -err: + return err; +err4: + unregister_netdevice_notifier(&nf_tables_flowtable_notifier); +err3: nf_tables_core_module_exit(); +err2: + nft_chain_filter_fini(); +err1: + unregister_pernet_subsys(&nf_tables_net_ops); return err; } diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index ea5b7c4944f6..9d07b277b9ee 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -392,7 +392,7 @@ int __init nft_chain_filter_init(void) return 0; } -void __exit nft_chain_filter_fini(void) +void nft_chain_filter_fini(void) { nft_chain_filter_bridge_fini(); nft_chain_filter_inet_fini(); -- cgit v1.2.3-70-g09d2 From cdb2f401246ef70b37eeedd9c6f9c28d611d8255 Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Mon, 13 Aug 2018 18:43:36 +0300 Subject: netfilter: uapi: fix linux/netfilter/nf_osf.h userspace compilation errors Move inclusion of and from linux/netfilter/xt_osf.h to linux/netfilter/nf_osf.h to fix the following linux/netfilter/nf_osf.h userspace compilation errors: /usr/include/linux/netfilter/nf_osf.h:59:24: error: 'MAX_IPOPTLEN' undeclared here (not in a function) struct nf_osf_opt opt[MAX_IPOPTLEN]; /usr/include/linux/netfilter/nf_osf.h:64:17: error: field 'ip' has incomplete type struct iphdr ip; /usr/include/linux/netfilter/nf_osf.h:65:18: error: field 'tcp' has incomplete type struct tcphdr tcp; Fixes: bfb15f2a95cb ("netfilter: extract Passive OS fingerprint infrastructure from xt_osf") Signed-off-by: Dmitry V. Levin Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_osf.h | 2 ++ include/uapi/linux/netfilter/xt_osf.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nfnetlink_osf.h b/include/uapi/linux/netfilter/nfnetlink_osf.h index 76a3527df5dd..272bc3195f2d 100644 --- a/include/uapi/linux/netfilter/nfnetlink_osf.h +++ b/include/uapi/linux/netfilter/nfnetlink_osf.h @@ -2,6 +2,8 @@ #define _NF_OSF_H #include +#include +#include #define MAXGENRELEN 32 diff --git a/include/uapi/linux/netfilter/xt_osf.h b/include/uapi/linux/netfilter/xt_osf.h index 24102b5286ec..6e466236ca4b 100644 --- a/include/uapi/linux/netfilter/xt_osf.h +++ b/include/uapi/linux/netfilter/xt_osf.h @@ -21,8 +21,6 @@ #define _XT_OSF_H #include -#include -#include #include #define XT_OSF_GENRE NF_OSF_GENRE -- cgit v1.2.3-70-g09d2 From ff93bca769925a2d8fd7f910cdf543d992e17f07 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 14 Aug 2018 15:21:31 -0700 Subject: ila: make lockdep happy again Previously, alloc_ila_locks() and bucket_table_alloc() call spin_lock_init() separately, therefore they have two different lock names and lock class keys. However, after commit b893281715ab ("ila: Call library function alloc_bucket_locks") they both call helper alloc_bucket_spinlocks() which now only has one lock name and lock class key. This causes a few bogus lockdep warnings as reported by syzbot. Fix this by making alloc_bucket_locks() a macro and pass declaration name as lock name and a static lock class key inside the macro. Fixes: b893281715ab ("ila: Call library function alloc_bucket_locks") Reported-by: Cc: Tom Herbert Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/linux/spinlock.h | 17 ++++++++++++++--- lib/bucket_locks.c | 11 +++++++---- 2 files changed, 21 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 3190997df9ca..e089157dcf97 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -451,9 +451,20 @@ extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, #define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) -int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, - size_t max_size, unsigned int cpu_mult, - gfp_t gfp); +int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, + size_t max_size, unsigned int cpu_mult, + gfp_t gfp, const char *name, + struct lock_class_key *key); + +#define alloc_bucket_spinlocks(locks, lock_mask, max_size, cpu_mult, gfp) \ + ({ \ + static struct lock_class_key key; \ + int ret; \ + \ + ret = __alloc_bucket_spinlocks(locks, lock_mask, max_size, \ + cpu_mult, gfp, #locks, &key); \ + ret; \ + }) void free_bucket_spinlocks(spinlock_t *locks); diff --git a/lib/bucket_locks.c b/lib/bucket_locks.c index ade3ce6c4af6..64b92e1dbace 100644 --- a/lib/bucket_locks.c +++ b/lib/bucket_locks.c @@ -11,8 +11,9 @@ * to a power of 2 to be suitable as a hash table. */ -int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, - size_t max_size, unsigned int cpu_mult, gfp_t gfp) +int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, + size_t max_size, unsigned int cpu_mult, gfp_t gfp, + const char *name, struct lock_class_key *key) { spinlock_t *tlocks = NULL; unsigned int i, size; @@ -33,8 +34,10 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, tlocks = kvmalloc_array(size, sizeof(spinlock_t), gfp); if (!tlocks) return -ENOMEM; - for (i = 0; i < size; i++) + for (i = 0; i < size; i++) { spin_lock_init(&tlocks[i]); + lockdep_init_map(&tlocks[i].dep_map, name, key, 0); + } } *locks = tlocks; @@ -42,7 +45,7 @@ int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *locks_mask, return 0; } -EXPORT_SYMBOL(alloc_bucket_spinlocks); +EXPORT_SYMBOL(__alloc_bucket_spinlocks); void free_bucket_spinlocks(spinlock_t *locks) { -- cgit v1.2.3-70-g09d2 From 037b0b86ecf5646f8eae777d8b52ff8b401692ec Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Aug 2018 21:49:06 +0200 Subject: tcp, ulp: add alias for all ulp modules Lets not turn the TCP ULP lookup into an arbitrary module loader as we only intend to load ULP modules through this mechanism, not other unrelated kernel modules: [root@bar]# cat foo.c #include #include #include #include int main(void) { int sock = socket(PF_INET, SOCK_STREAM, 0); setsockopt(sock, IPPROTO_TCP, TCP_ULP, "sctp", sizeof("sctp")); return 0; } [root@bar]# gcc foo.c -O2 -Wall [root@bar]# lsmod | grep sctp [root@bar]# ./a.out [root@bar]# lsmod | grep sctp sctp 1077248 4 libcrc32c 16384 3 nf_conntrack,nf_nat,sctp [root@bar]# Fix it by adding module alias to TCP ULP modules, so probing module via request_module() will be limited to tcp-ulp-[name]. The existing modules like kTLS will load fine given tcp-ulp-tls alias, but others will fail to load: [root@bar]# lsmod | grep sctp [root@bar]# ./a.out [root@bar]# lsmod | grep sctp [root@bar]# Sockmap is not affected from this since it's either built-in or not. Fixes: 734942cc4ea6 ("tcp: ULP infrastructure") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Acked-by: Song Liu Signed-off-by: Alexei Starovoitov --- include/net/tcp.h | 4 ++++ net/ipv4/tcp_ulp.c | 2 +- net/tls/tls_main.c | 1 + 3 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d196901c9dba..770917d0caa7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -2065,6 +2065,10 @@ int tcp_set_ulp_id(struct sock *sk, const int ulp); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); +#define MODULE_ALIAS_TCP_ULP(name) \ + __MODULE_INFO(alias, alias_userspace, name); \ + __MODULE_INFO(alias, alias_tcp_ulp, "tcp-ulp-" name) + /* Call BPF_SOCK_OPS program that returns an int. If the return value * is < 0, then the BPF op failed (for example if the loaded BPF * program does not support the chosen operation or there is no BPF diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 622caa4039e0..7dd44b6156c7 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -51,7 +51,7 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) #ifdef CONFIG_MODULES if (!ulp && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); - request_module("%s", name); + request_module("tcp-ulp-%s", name); rcu_read_lock(); ulp = tcp_ulp_find(name); } diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index b09867c8b817..93c0c225ab34 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -45,6 +45,7 @@ MODULE_AUTHOR("Mellanox Technologies"); MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); +MODULE_ALIAS_TCP_ULP("tls"); enum { TLSV4, -- cgit v1.2.3-70-g09d2 From f6069b9aa9934ede26f41ac0781fce241279ad43 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 17 Aug 2018 23:26:14 +0200 Subject: bpf: fix redirect to map under tail calls Commits 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") and 7c3001313396 ("bpf: fix ri->map_owner pointer on bpf_prog_realloc") tried to mitigate that buggy programs using bpf_redirect_map() helper call do not leave stale maps behind. Idea was to add a map_owner cookie into the per CPU struct redirect_info which was set to prog->aux by the prog making the helper call as a proof that the map is not stale since the prog is implicitly holding a reference to it. This owner cookie could later on get compared with the program calling into BPF whether they match and therefore the redirect could proceed with processing the map safely. In (obvious) hindsight, this approach breaks down when tail calls are involved since the original caller's prog->aux pointer does not have to match the one from one of the progs out of the tail call chain, and therefore the xdp buffer will be dropped instead of redirected. A way around that would be to fix the issue differently (which also allows to remove related work in fast path at the same time): once the life-time of a redirect map has come to its end we use it's map free callback where we need to wait on synchronize_rcu() for current outstanding xdp buffers and remove such a map pointer from the redirect info if found to be present. At that time no program is using this map anymore so we simply invalidate the map pointers to NULL iff they previously pointed to that instance while making sure that the redirect path only reads out the map once. Fixes: 97f91a7cf04f ("bpf: add bpf_redirect_map helper routine") Fixes: 109980b894e9 ("bpf: don't select potentially stale ri->map from buggy xdp progs") Reported-by: Sebastiano Miano Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- include/linux/filter.h | 3 +- include/trace/events/xdp.h | 5 ++-- kernel/bpf/cpumap.c | 2 ++ kernel/bpf/devmap.c | 1 + kernel/bpf/verifier.c | 21 -------------- kernel/bpf/xskmap.c | 1 + net/core/filter.c | 68 ++++++++++++++++++++-------------------------- 7 files changed, 38 insertions(+), 63 deletions(-) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 5d565c50bcb2..6791a0ac0139 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -543,7 +543,6 @@ struct bpf_redirect_info { u32 flags; struct bpf_map *map; struct bpf_map *map_to_flush; - unsigned long map_owner; u32 kern_flags; }; @@ -781,6 +780,8 @@ static inline bool bpf_dump_raw_ok(void) struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +void bpf_clear_redirect_map(struct bpf_map *map); + static inline bool xdp_return_frame_no_direct(void) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index 1ecf4c67fcf7..e95cb86b65cf 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -147,9 +147,8 @@ struct _bpf_dtab_netdev { #define devmap_ifindex(fwd, map) \ (!fwd ? 0 : \ - (!map ? 0 : \ - ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ - ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0))) + ((map->map_type == BPF_MAP_TYPE_DEVMAP) ? \ + ((struct _bpf_dtab_netdev *)fwd)->dev->ifindex : 0)) #define _trace_xdp_redirect_map(dev, xdp, fwd, map, idx) \ trace_xdp_redirect_map(dev, xdp, devmap_ifindex(fwd, map), \ diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index 620bc5024d7d..24aac0d0f412 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -479,6 +479,8 @@ static void cpu_map_free(struct bpf_map *map) * It does __not__ ensure pending flush operations (if any) are * complete. */ + + bpf_clear_redirect_map(map); synchronize_rcu(); /* To ensure all pending flush operations have completed wait for flush diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index ac1df79f3788..141710b82a6c 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -161,6 +161,7 @@ static void dev_map_free(struct bpf_map *map) list_del_rcu(&dtab->list); spin_unlock(&dev_map_lock); + bpf_clear_redirect_map(map); synchronize_rcu(); /* To ensure all pending flush operations have completed wait for flush diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ca90679a7fe5..92246117d2b0 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5844,27 +5844,6 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) goto patch_call_imm; } - if (insn->imm == BPF_FUNC_redirect_map) { - /* Note, we cannot use prog directly as imm as subsequent - * rewrites would still change the prog pointer. The only - * stable address we can use is aux, which also works with - * prog clones during blinding. - */ - u64 addr = (unsigned long)prog->aux; - struct bpf_insn r4_ld[] = { - BPF_LD_IMM64(BPF_REG_4, addr), - *insn, - }; - cnt = ARRAY_SIZE(r4_ld); - - new_prog = bpf_patch_insn_data(env, i + delta, r4_ld, cnt); - if (!new_prog) - return -ENOMEM; - - delta += cnt - 1; - env->prog = prog = new_prog; - insn = new_prog->insnsi + i + delta; - } patch_call_imm: fn = env->ops->get_func_proto(insn->imm, env->prog); /* all functions that have prototype and verifier allowed diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 4ddf61e158f6..9f8463afda9c 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -75,6 +75,7 @@ static void xsk_map_free(struct bpf_map *map) struct xsk_map *m = container_of(map, struct xsk_map, map); int i; + bpf_clear_redirect_map(map); synchronize_net(); for (i = 0; i < map->max_entries; i++) { diff --git a/net/core/filter.c b/net/core/filter.c index fd423ce3da34..c25eb36f1320 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3246,31 +3246,33 @@ static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) } } -static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, - unsigned long aux) +void bpf_clear_redirect_map(struct bpf_map *map) { - return (unsigned long)xdp_prog->aux != aux; + struct bpf_redirect_info *ri; + int cpu; + + for_each_possible_cpu(cpu) { + ri = per_cpu_ptr(&bpf_redirect_info, cpu); + /* Avoid polluting remote cacheline due to writes if + * not needed. Once we pass this test, we need the + * cmpxchg() to make sure it hasn't been changed in + * the meantime by remote CPU. + */ + if (unlikely(READ_ONCE(ri->map) == map)) + cmpxchg(&ri->map, map, NULL); + } } static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + struct bpf_prog *xdp_prog, struct bpf_map *map) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - unsigned long map_owner = ri->map_owner; - struct bpf_map *map = ri->map; u32 index = ri->ifindex; void *fwd = NULL; int err; ri->ifindex = 0; - ri->map = NULL; - ri->map_owner = 0; - - if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { - err = -EFAULT; - map = NULL; - goto err; - } + WRITE_ONCE(ri->map, NULL); fwd = __xdp_map_lookup_elem(map, index); if (!fwd) { @@ -3296,12 +3298,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_map *map = READ_ONCE(ri->map); struct net_device *fwd; u32 index = ri->ifindex; int err; - if (ri->map) - return xdp_do_redirect_map(dev, xdp, xdp_prog); + if (map) + return xdp_do_redirect_map(dev, xdp, xdp_prog, map); fwd = dev_get_by_index_rcu(dev_net(dev), index); ri->ifindex = 0; @@ -3325,24 +3328,17 @@ EXPORT_SYMBOL_GPL(xdp_do_redirect); static int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) + struct bpf_prog *xdp_prog, + struct bpf_map *map) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - unsigned long map_owner = ri->map_owner; - struct bpf_map *map = ri->map; u32 index = ri->ifindex; void *fwd = NULL; int err = 0; ri->ifindex = 0; - ri->map = NULL; - ri->map_owner = 0; + WRITE_ONCE(ri->map, NULL); - if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { - err = -EFAULT; - map = NULL; - goto err; - } fwd = __xdp_map_lookup_elem(map, index); if (unlikely(!fwd)) { err = -EINVAL; @@ -3379,13 +3375,14 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, struct xdp_buff *xdp, struct bpf_prog *xdp_prog) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); + struct bpf_map *map = READ_ONCE(ri->map); u32 index = ri->ifindex; struct net_device *fwd; int err = 0; - if (ri->map) - return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog); - + if (map) + return xdp_do_generic_redirect_map(dev, skb, xdp, xdp_prog, + map); ri->ifindex = 0; fwd = dev_get_by_index_rcu(dev_net(dev), index); if (unlikely(!fwd)) { @@ -3416,8 +3413,7 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) ri->ifindex = ifindex; ri->flags = flags; - ri->map = NULL; - ri->map_owner = 0; + WRITE_ONCE(ri->map, NULL); return XDP_REDIRECT; } @@ -3430,8 +3426,8 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, - unsigned long, map_owner) +BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, + u64, flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); @@ -3440,15 +3436,11 @@ BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags ri->ifindex = ifindex; ri->flags = flags; - ri->map = map; - ri->map_owner = map_owner; + WRITE_ONCE(ri->map, map); return XDP_REDIRECT; } -/* Note, arg4 is hidden from users and populated by the verifier - * with the right pointer. - */ static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { .func = bpf_xdp_redirect_map, .gpl_only = false, -- cgit v1.2.3-70-g09d2