From c51d39010a1bccc9c1294e2d7c00005aefeb2b5c Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Tue, 15 Nov 2016 15:08:25 +0100 Subject: netfilter: conntrack: built-in support for DCCP CONFIG_NF_CT_PROTO_DCCP is no more a tristate. When set to y, connection tracking support for DCCP protocol is built-in into nf_conntrack.ko. footprint test: $ ls -l net/netfilter/nf_conntrack{_proto_dccp,}.ko \ net/ipv4/netfilter/nf_conntrack_ipv4.ko \ net/ipv6/netfilter/nf_conntrack_ipv6.ko (builtin)|| dccp | ipv4 | ipv6 | nf_conntrack ---------++--------+--------+--------+-------------- none || 469140 | 828755 | 828676 | 6141434 DCCP || - | 830566 | 829935 | 6533526 Signed-off-by: Davide Caratti Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_dccp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_dccp.h b/include/linux/netfilter/nf_conntrack_dccp.h index 40dcc82058d1..ff721d7325cf 100644 --- a/include/linux/netfilter/nf_conntrack_dccp.h +++ b/include/linux/netfilter/nf_conntrack_dccp.h @@ -25,7 +25,7 @@ enum ct_dccp_roles { #define CT_DCCP_ROLE_MAX (__CT_DCCP_ROLE_MAX - 1) #ifdef __KERNEL__ -#include +#include struct nf_ct_dccp { u_int8_t role[IP_CT_DIR_MAX]; -- cgit v1.2.3-70-g09d2 From 0aa8c57a04907a5d02068ff9f917629be97ea78d Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 15 Nov 2016 17:48:44 -0500 Subject: netfilter: introduce accessor functions for hook entries This allows easier future refactoring. Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 27 +++++++++++++++++++++++++++ net/bridge/br_netfilter_hooks.c | 2 +- net/netfilter/core.c | 10 ++++------ net/netfilter/nf_queue.c | 5 ++--- 4 files changed, 34 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 69230140215b..575aa198097e 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -79,6 +79,33 @@ struct nf_hook_entry { const struct nf_hook_ops *orig_ops; }; +static inline void +nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) +{ + entry->next = NULL; + entry->ops = *ops; + entry->orig_ops = ops; +} + +static inline int +nf_hook_entry_priority(const struct nf_hook_entry *entry) +{ + return entry->ops.priority; +} + +static inline int +nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, + struct nf_hook_state *state) +{ + return entry->ops.hook(entry->ops.priv, skb, state); +} + +static inline const struct nf_hook_ops * +nf_hook_entry_ops(const struct nf_hook_entry *entry) +{ + return entry->orig_ops; +} + static inline void nf_hook_state_init(struct nf_hook_state *p, unsigned int hook, u_int8_t pf, diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 83d937f4415e..adad2eed29e6 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1010,7 +1010,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) + while (elem && (nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF)) elem = rcu_dereference(elem->next); if (!elem) diff --git a/net/netfilter/core.c b/net/netfilter/core.c index de30e08d58f2..2bb46e2d8d30 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -102,15 +102,13 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) if (!entry) return -ENOMEM; - entry->orig_ops = reg; - entry->ops = *reg; - entry->next = NULL; + nf_hook_entry_init(entry, reg); mutex_lock(&nf_hook_mutex); /* Find the spot in the list */ while ((p = nf_entry_dereference(*pp)) != NULL) { - if (reg->priority < p->orig_ops->priority) + if (reg->priority < nf_hook_entry_priority(p)) break; pp = &p->next; } @@ -140,7 +138,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) mutex_lock(&nf_hook_mutex); while ((p = nf_entry_dereference(*pp)) != NULL) { - if (p->orig_ops == reg) { + if (nf_hook_entry_ops(p) == reg) { rcu_assign_pointer(*pp, p->next); break; } @@ -311,7 +309,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, int ret; do { - verdict = entry->ops.hook(entry->ops.priv, skb, state); + verdict = nf_hook_entry_hookfn(entry, skb, state); switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: entry = rcu_dereference(entry->next); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 77cba9f6ccb6..4a7662486f44 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -185,7 +185,7 @@ static unsigned int nf_iterate(struct sk_buff *skb, do { repeat: - verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); + verdict = nf_hook_entry_hookfn((*entryp), skb, state); if (verdict != NF_ACCEPT) { if (verdict != NF_REPEAT) return verdict; @@ -200,7 +200,6 @@ repeat: void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { struct nf_hook_entry *hook_entry = entry->hook; - struct nf_hook_ops *elem = &hook_entry->ops; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; int err; @@ -209,7 +208,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) - verdict = elem->hook(elem->priv, skb, &entry->state); + verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); if (verdict == NF_ACCEPT) { afinfo = nf_get_afinfo(entry->state.pf); -- cgit v1.2.3-70-g09d2 From d415b9eb76fc55c03ef5451691170aa5771dcea3 Mon Sep 17 00:00:00 2001 From: Aaron Conole Date: Tue, 15 Nov 2016 17:48:45 -0500 Subject: netfilter: decouple nf_hook_entry and nf_hook_ops During nfhook traversal we only need a very small subset of nf_hook_ops members. We need: - next element - hook function to call - hook function priv argument Bridge netfilter also needs 'thresh'; can be obtained via ->orig_ops. nf_hook_entry struct is now 32 bytes on x86_64. A followup patch will turn the run-time list into an array that only stores hook functions plus their priv arguments, eliminating the ->next element. Suggested-by: Florian Westphal Signed-off-by: Aaron Conole Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 575aa198097e..a4b97be30b28 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -75,7 +75,8 @@ struct nf_hook_ops { struct nf_hook_entry { struct nf_hook_entry __rcu *next; - struct nf_hook_ops ops; + nf_hookfn *hook; + void *priv; const struct nf_hook_ops *orig_ops; }; @@ -83,21 +84,22 @@ static inline void nf_hook_entry_init(struct nf_hook_entry *entry, const struct nf_hook_ops *ops) { entry->next = NULL; - entry->ops = *ops; + entry->hook = ops->hook; + entry->priv = ops->priv; entry->orig_ops = ops; } static inline int nf_hook_entry_priority(const struct nf_hook_entry *entry) { - return entry->ops.priority; + return entry->orig_ops->priority; } static inline int nf_hook_entry_hookfn(const struct nf_hook_entry *entry, struct sk_buff *skb, struct nf_hook_state *state) { - return entry->ops.hook(entry->ops.priv, skb, state); + return entry->hook(entry->priv, skb, state); } static inline const struct nf_hook_ops * -- cgit v1.2.3-70-g09d2 From 4d31eef5176df06f218201bc9c0ce40babb41660 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:17 +0100 Subject: netfilter: x_tables: pass xt_counters struct instead of packet counter On SMP we overload the packet counter (unsigned long) to contain percpu offset. Hide this from callers and pass xt_counters address instead. Preparation patch to allocate the percpu counters in page-sized batch chunks. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 6 +----- net/ipv4/netfilter/arp_tables.c | 4 ++-- net/ipv4/netfilter/ip_tables.c | 4 ++-- net/ipv6/netfilter/ip6_tables.c | 5 ++--- net/netfilter/x_tables.c | 9 +++++++++ 5 files changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index cd4eaf8df445..6e61edeb68e3 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -430,11 +430,7 @@ static inline unsigned long xt_percpu_counter_alloc(void) return 0; } -static inline void xt_percpu_counter_free(u64 pcnt) -{ - if (nr_cpu_ids > 1) - free_percpu((void __percpu *) (unsigned long) pcnt); -} +void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * xt_get_this_cpu_counter(struct xt_counters *cnt) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 848a0704b28f..019f8e8dda6d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -439,7 +439,7 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) err: module_put(t->u.kernel.target->me); out: - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); return ret; } @@ -519,7 +519,7 @@ static inline void cleanup_entry(struct arpt_entry *e) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 46815c8a60d7..acc9a0c45bdf 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -582,7 +582,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, cleanup_match(ematch, net); } - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); return ret; } @@ -670,7 +670,7 @@ cleanup_entry(struct ipt_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 6ff42b8301cc..88b56a98905b 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -612,7 +612,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, cleanup_match(ematch, net); } - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); return ret; } @@ -699,8 +699,7 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); - - xt_percpu_counter_free(e->counters.pcnt); + xt_percpu_counter_free(&e->counters); } /* Checks and translates the user-supplied table segment (held in diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index ad818e52859b..0580029eb0ee 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1615,6 +1615,15 @@ void xt_proto_fini(struct net *net, u_int8_t af) } EXPORT_SYMBOL_GPL(xt_proto_fini); +void xt_percpu_counter_free(struct xt_counters *counters) +{ + unsigned long pcnt = counters->pcnt; + + if (nr_cpu_ids > 1) + free_percpu((void __percpu *)pcnt); +} +EXPORT_SYMBOL_GPL(xt_percpu_counter_free); + static int __net_init xt_net_init(struct net *net) { int i; -- cgit v1.2.3-70-g09d2 From f28e15bacedd444608e25421c72eb2cf4527c9ca Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:18 +0100 Subject: netfilter: x_tables: pass xt_counters struct to counter allocator Keeps some noise away from a followup patch. Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 27 +-------------------------- net/ipv4/netfilter/arp_tables.c | 5 +---- net/ipv4/netfilter/ip_tables.c | 5 +---- net/ipv6/netfilter/ip6_tables.c | 5 +---- net/netfilter/x_tables.c | 30 ++++++++++++++++++++++++++++++ 5 files changed, 34 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 6e61edeb68e3..05a94bd32c55 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -404,32 +404,7 @@ static inline unsigned long ifname_compare_aligned(const char *_a, } -/* On SMP, ip(6)t_entry->counters.pcnt holds address of the - * real (percpu) counter. On !SMP, its just the packet count, - * so nothing needs to be done there. - * - * xt_percpu_counter_alloc returns the address of the percpu - * counter, or 0 on !SMP. We force an alignment of 16 bytes - * so that bytes/packets share a common cache line. - * - * Hence caller must use IS_ERR_VALUE to check for error, this - * allows us to return 0 for single core systems without forcing - * callers to deal with SMP vs. NONSMP issues. - */ -static inline unsigned long xt_percpu_counter_alloc(void) -{ - if (nr_cpu_ids > 1) { - void __percpu *res = __alloc_percpu(sizeof(struct xt_counters), - sizeof(struct xt_counters)); - - if (res == NULL) - return -ENOMEM; - - return (__force unsigned long) res; - } - - return 0; -} +bool xt_percpu_counter_alloc(struct xt_counters *counters); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 019f8e8dda6d..808deb275ceb 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -415,13 +415,10 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) { struct xt_entry_target *t; struct xt_target *target; - unsigned long pcnt; int ret; - pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(pcnt)) + if (!xt_percpu_counter_alloc(&e->counters)) return -ENOMEM; - e->counters.pcnt = pcnt; t = arpt_get_target(e); target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index acc9a0c45bdf..a48430d3420f 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -539,12 +539,9 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - unsigned long pcnt; - pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(pcnt)) + if (!xt_percpu_counter_alloc(&e->counters)) return -ENOMEM; - e->counters.pcnt = pcnt; j = 0; mtpar.net = net; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 88b56a98905b..a5a92083fd62 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -570,12 +570,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, unsigned int j; struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - unsigned long pcnt; - pcnt = xt_percpu_counter_alloc(); - if (IS_ERR_VALUE(pcnt)) + if (!xt_percpu_counter_alloc(&e->counters)) return -ENOMEM; - e->counters.pcnt = pcnt; j = 0; mtpar.net = net; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 0580029eb0ee..be5e83047594 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1615,6 +1615,36 @@ void xt_proto_fini(struct net *net, u_int8_t af) } EXPORT_SYMBOL_GPL(xt_proto_fini); +/** + * xt_percpu_counter_alloc - allocate x_tables rule counter + * + * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct + * + * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then + * contain the address of the real (percpu) counter. + * + * Rule evaluation needs to use xt_get_this_cpu_counter() helper + * to fetch the real percpu counter. + * + * returns false on error. + */ +bool xt_percpu_counter_alloc(struct xt_counters *counter) +{ + void __percpu *res; + + if (nr_cpu_ids <= 1) + return true; + + res = __alloc_percpu(sizeof(struct xt_counters), + sizeof(struct xt_counters)); + if (!res) + return false; + + counter->pcnt = (__force unsigned long)res; + return true; +} +EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc); + void xt_percpu_counter_free(struct xt_counters *counters) { unsigned long pcnt = counters->pcnt; -- cgit v1.2.3-70-g09d2 From ae0ac0ed6fcf5af3be0f63eb935f483f44a402d2 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 22 Nov 2016 14:44:19 +0100 Subject: netfilter: x_tables: pack percpu counter allocations instead of allocating each xt_counter individually, allocate 4k chunks and then use these for counter allocation requests. This should speed up rule evaluation by increasing data locality, also speeds up ruleset loading because we reduce calls to the percpu allocator. As Eric points out we can't use PAGE_SIZE, page_allocator would fail on arches with 64k page size. Suggested-by: Eric Dumazet Signed-off-by: Florian Westphal Acked-by: Eric Dumazet Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 7 ++++++- net/ipv4/netfilter/arp_tables.c | 9 ++++++--- net/ipv4/netfilter/ip_tables.c | 9 ++++++--- net/ipv6/netfilter/ip6_tables.c | 9 ++++++--- net/netfilter/x_tables.c | 33 ++++++++++++++++++++++++--------- 5 files changed, 48 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 05a94bd32c55..5117e4d2ddfa 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -403,8 +403,13 @@ static inline unsigned long ifname_compare_aligned(const char *_a, return ret; } +struct xt_percpu_counter_alloc_state { + unsigned int off; + const char __percpu *mem; +}; -bool xt_percpu_counter_alloc(struct xt_counters *counters); +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter); void xt_percpu_counter_free(struct xt_counters *cnt); static inline struct xt_counters * diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 808deb275ceb..1258a9ab62ef 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -411,13 +411,14 @@ static inline int check_target(struct arpt_entry *e, const char *name) } static inline int -find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) +find_check_entry(struct arpt_entry *e, const char *name, unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; int ret; - if (!xt_percpu_counter_alloc(&e->counters)) + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; t = arpt_get_target(e); @@ -525,6 +526,7 @@ static inline void cleanup_entry(struct arpt_entry *e) static int translate_table(struct xt_table_info *newinfo, void *entry0, const struct arpt_replace *repl) { + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct arpt_entry *iter; unsigned int *offsets; unsigned int i; @@ -587,7 +589,8 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, repl->name, repl->size); + ret = find_check_entry(iter, repl->name, repl->size, + &alloc_state); if (ret != 0) break; ++i; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index a48430d3420f..308b456723f0 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -531,7 +531,8 @@ static int check_target(struct ipt_entry *e, struct net *net, const char *name) static int find_check_entry(struct ipt_entry *e, struct net *net, const char *name, - unsigned int size) + unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; @@ -540,7 +541,7 @@ find_check_entry(struct ipt_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - if (!xt_percpu_counter_alloc(&e->counters)) + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; @@ -676,6 +677,7 @@ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ipt_replace *repl) { + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ipt_entry *iter; unsigned int *offsets; unsigned int i; @@ -735,7 +737,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, net, repl->name, repl->size); + ret = find_check_entry(iter, net, repl->name, repl->size, + &alloc_state); if (ret != 0) break; ++i; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index a5a92083fd62..d56d8ac09a94 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -562,7 +562,8 @@ static int check_target(struct ip6t_entry *e, struct net *net, const char *name) static int find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, - unsigned int size) + unsigned int size, + struct xt_percpu_counter_alloc_state *alloc_state) { struct xt_entry_target *t; struct xt_target *target; @@ -571,7 +572,7 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; - if (!xt_percpu_counter_alloc(&e->counters)) + if (!xt_percpu_counter_alloc(alloc_state, &e->counters)) return -ENOMEM; j = 0; @@ -705,6 +706,7 @@ static int translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, const struct ip6t_replace *repl) { + struct xt_percpu_counter_alloc_state alloc_state = { 0 }; struct ip6t_entry *iter; unsigned int *offsets; unsigned int i; @@ -764,7 +766,8 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, /* Finally, each sanity check must pass */ i = 0; xt_entry_foreach(iter, entry0, newinfo->size) { - ret = find_check_entry(iter, net, repl->name, repl->size); + ret = find_check_entry(iter, net, repl->name, repl->size, + &alloc_state); if (ret != 0) break; ++i; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index be5e83047594..f6ce4a7036e6 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -40,6 +40,7 @@ MODULE_AUTHOR("Harald Welte "); MODULE_DESCRIPTION("{ip,ip6,arp,eb}_tables backend module"); #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1)) +#define XT_PCPU_BLOCK_SIZE 4096 struct compat_delta { unsigned int offset; /* offset in kernel */ @@ -1618,6 +1619,7 @@ EXPORT_SYMBOL_GPL(xt_proto_fini); /** * xt_percpu_counter_alloc - allocate x_tables rule counter * + * @state: pointer to xt_percpu allocation state * @counter: pointer to counter struct inside the ip(6)/arpt_entry struct * * On SMP, the packet counter [ ip(6)t_entry->counters.pcnt ] will then @@ -1626,21 +1628,34 @@ EXPORT_SYMBOL_GPL(xt_proto_fini); * Rule evaluation needs to use xt_get_this_cpu_counter() helper * to fetch the real percpu counter. * + * To speed up allocation and improve data locality, a 4kb block is + * allocated. + * + * xt_percpu_counter_alloc_state contains the base address of the + * allocated page and the current sub-offset. + * * returns false on error. */ -bool xt_percpu_counter_alloc(struct xt_counters *counter) +bool xt_percpu_counter_alloc(struct xt_percpu_counter_alloc_state *state, + struct xt_counters *counter) { - void __percpu *res; + BUILD_BUG_ON(XT_PCPU_BLOCK_SIZE < (sizeof(*counter) * 2)); if (nr_cpu_ids <= 1) return true; - res = __alloc_percpu(sizeof(struct xt_counters), - sizeof(struct xt_counters)); - if (!res) - return false; - - counter->pcnt = (__force unsigned long)res; + if (!state->mem) { + state->mem = __alloc_percpu(XT_PCPU_BLOCK_SIZE, + XT_PCPU_BLOCK_SIZE); + if (!state->mem) + return false; + } + counter->pcnt = (__force unsigned long)(state->mem + state->off); + state->off += sizeof(*counter); + if (state->off > (XT_PCPU_BLOCK_SIZE - sizeof(*counter))) { + state->mem = NULL; + state->off = 0; + } return true; } EXPORT_SYMBOL_GPL(xt_percpu_counter_alloc); @@ -1649,7 +1664,7 @@ void xt_percpu_counter_free(struct xt_counters *counters) { unsigned long pcnt = counters->pcnt; - if (nr_cpu_ids > 1) + if (nr_cpu_ids > 1 && (pcnt & (XT_PCPU_BLOCK_SIZE - 1)) == 0) free_percpu((void __percpu *)pcnt); } EXPORT_SYMBOL_GPL(xt_percpu_counter_free); -- cgit v1.2.3-70-g09d2 From df122f58b834b24c27d7e2ac02a4910d3e56f6ae Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 28 Nov 2016 11:40:05 +0100 Subject: netfilter: ingress: translate 0 nf_hook_slow retval to -1 The caller assumes that < 0 means that skb was stolen (or free'd). All other return values continue skb processing. nf_hook_slow returns 3 different return value types: A) a (negative) errno value: the skb was dropped (NF_DROP, e.g. by iptables '-j DROP' rule). B) 0. The skb was stolen by the hook or queued to userspace. C) 1. all hooks returned NF_ACCEPT so the caller should invoke the okfn so packet processing can continue. nft ingress facility currently doesn't have the 'okfn' that the NF_HOOK() macros use; there is no nfqueue support either. So 1 means that nf_hook_ingress() caller should go on processing the skb. In order to allow use of NF_STOLEN from ingress we need to translate this to an errno number, else we'd crash because we continue with already-free'd (or about to be free-d) skb. The errno value isn't checked, its just important that its less than 0, so return -1. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 2dc3b49b804a..59476061de86 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,6 +19,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) { struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; + int ret; /* Must recheck the ingress hook head, in the event it became NULL * after the check in nf_hook_ingress_active evaluated to true. @@ -29,7 +30,11 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, NF_NETDEV_INGRESS, NFPROTO_NETDEV, skb->dev, NULL, NULL, dev_net(skb->dev), NULL); - return nf_hook_slow(skb, &state, e); + ret = nf_hook_slow(skb, &state, e); + if (ret == 0) + return -1; + + return ret; } static inline void nf_hook_ingress_init(struct net_device *dev) -- cgit v1.2.3-70-g09d2