diff options
author | David S. Miller <davem@davemloft.net> | 2014-03-17 15:06:24 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-17 15:06:24 -0400 |
commit | e86e180b824e00733bd0e499d412a595078f9b51 (patch) | |
tree | ebda350b99785b4d0dd0188dd28fa17ec8135474 /include | |
parent | e7ef085d0a9dc1cc72e7d8108ed3b4e1a5e8d938 (diff) | |
parent | 7d08487777c8b30dea34790734d708470faaf1e5 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says:
====================
Netfilter/IPVS updates for net-next
The following patchset contains Netfilter/IPVS updates for net-next,
most relevantly they are:
* cleanup to remove double semicolon from stephen hemminger.
* calm down sparse warning in xt_ipcomp, from Fan Du.
* nf_ct_labels support for nf_tables, from Florian Westphal.
* new macros to simplify rcu dereferences in the scope of nfnetlink
and nf_tables, from Patrick McHardy.
* Accept queue and drop (including reason for drop) to verdict
parsing in nf_tables, also from Patrick.
* Remove unused random seed initialization in nfnetlink_log, from
Florian Westphal.
* Allow to attach user-specific information to nf_tables rules, useful
to attach user comments to rule, from me.
* Return errors in ipset according to the manpage documentation, from
Jozsef Kadlecsik.
* Fix coccinelle warnings related to incorrect bool type usage for ipset,
from Fengguang Wu.
* Add hash:ip,mark set type to ipset, from Vytas Dauksa.
* Fix message for each spotted by ipset for each netns that is created,
from Ilia Mirkin.
* Add forceadd option to ipset, which evicts a random entry from the set
if it becomes full, from Josh Hunt.
* Minor IPVS cleanups and fixes from Andi Kleen and Tingwei Liu.
* Improve conntrack scalability by removing a central spinlock, original
work from Eric Dumazet. Jesper Dangaard Brouer took them over to address
remaining issues. Several patches to prepare this change come in first
place.
* Rework nft_hash to resolve bugs (leaking chain, missing rcu synchronization
on element removal, etc. from Patrick McHardy.
* Restore context in the rule deletion path, as we now release rule objects
synchronously, from Patrick McHardy. This gets back event notification for
anonymous sets.
* Fix NAT family validation in nft_nat, also from Patrick.
* Improve scalability of xt_connlimit by using an array of spinlocks and
by introducing a rb-tree of hashtables for faster lookup of accounted
objects per network. This patch was preceded by several patches and
refactorizations to accomodate this change including the use of kmem_cache,
from Florian Westphal.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/netfilter/ipset/ip_set.h | 15 | ||||
-rw-r--r-- | include/linux/netfilter/nfnetlink.h | 21 | ||||
-rw-r--r-- | include/net/netfilter/nf_conntrack.h | 11 | ||||
-rw-r--r-- | include/net/netfilter/nf_conntrack_core.h | 9 | ||||
-rw-r--r-- | include/net/netfilter/nf_conntrack_labels.h | 4 | ||||
-rw-r--r-- | include/net/netfilter/nf_tables.h | 28 | ||||
-rw-r--r-- | include/net/netns/conntrack.h | 13 | ||||
-rw-r--r-- | include/uapi/linux/netfilter/ipset/ip_set.h | 12 | ||||
-rw-r--r-- | include/uapi/linux/netfilter/nf_tables.h | 6 |
9 files changed, 96 insertions, 23 deletions
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0c7d01eae56c..96afc29184be 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -39,11 +39,13 @@ enum ip_set_feature { IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG), IPSET_TYPE_IFACE_FLAG = 5, IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG), - IPSET_TYPE_NOMATCH_FLAG = 6, + IPSET_TYPE_MARK_FLAG = 6, + IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG), + IPSET_TYPE_NOMATCH_FLAG = 7, IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG), /* Strictly speaking not a feature, but a flag for dumping: * this settype must be dumped last */ - IPSET_DUMP_LAST_FLAG = 7, + IPSET_DUMP_LAST_FLAG = 8, IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG), }; @@ -63,6 +65,7 @@ enum ip_set_extension { #define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT) #define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER) #define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT) +#define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD) /* Extension id, in size order */ enum ip_set_ext_id { @@ -171,8 +174,6 @@ struct ip_set_type { char name[IPSET_MAXNAMELEN]; /* Protocol version */ u8 protocol; - /* Set features to control swapping */ - u8 features; /* Set type dimension */ u8 dimension; /* @@ -182,6 +183,8 @@ struct ip_set_type { u8 family; /* Type revisions */ u8 revision_min, revision_max; + /* Set features to control swapping */ + u16 features; /* Create set */ int (*create)(struct net *net, struct ip_set *set, @@ -217,6 +220,8 @@ struct ip_set { u8 revision; /* Extensions */ u8 extensions; + /* Create flags */ + u8 flags; /* Default timeout value, if enabled */ u32 timeout; /* Element data size */ @@ -251,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set) cadt_flags |= IPSET_FLAG_WITH_COUNTERS; if (SET_WITH_COMMENT(set)) cadt_flags |= IPSET_FLAG_WITH_COMMENT; + if (SET_WITH_FORCEADD(set)) + cadt_flags |= IPSET_FLAG_WITH_FORCEADD; if (!cadt_flags) return 0; diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 28c74367e900..e955d4730625 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -44,6 +44,27 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); +#ifdef CONFIG_PROVE_LOCKING +int lockdep_nfnl_is_held(__u8 subsys_id); +#else +static inline int lockdep_nfnl_is_held(__u8 subsys_id) +{ + return 1; +} +#endif /* CONFIG_PROVE_LOCKING */ + +/* + * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex + * + * @p: The pointer to read, prior to dereferencing + * @ss: The nfnetlink subsystem ID + * + * Return the value of the specified RCU-protected pointer, but omit + * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * caller holds the NFNL subsystem mutex. + */ +#define nfnl_dereference(p, ss) \ + rcu_dereference_protected(p, lockdep_nfnl_is_held(ss)) #define MODULE_ALIAS_NFNL_SUBSYS(subsys) \ MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys)) diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index b2ac6246b7e0..37252f71a380 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -73,10 +73,17 @@ struct nf_conn_help { struct nf_conn { /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, - plus 1 for any connection(s) we are `master' for */ + * plus 1 for any connection(s) we are `master' for + * + * Hint, SKB address this struct and refcnt via skb->nfct and + * helpers nf_conntrack_get() and nf_conntrack_put(). + * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, + * beware nf_ct_get() is different and don't inc refcnt. + */ struct nf_conntrack ct_general; - spinlock_t lock; + spinlock_t lock; + u16 cpu; /* XXX should I move this to the tail ? - Y.K */ /* These are my tuples; original and reply */ diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 15308b8eb5b5..cc0c18827602 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -77,6 +77,13 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *proto); -extern spinlock_t nf_conntrack_lock ; +#ifdef CONFIG_LOCKDEP +# define CONNTRACK_LOCKS 8 +#else +# define CONNTRACK_LOCKS 1024 +#endif +extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; + +extern spinlock_t nf_conntrack_expect_lock; #endif /* _NF_CONNTRACK_CORE_H */ diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h index c985695283b3..dec6336bf850 100644 --- a/include/net/netfilter/nf_conntrack_labels.h +++ b/include/net/netfilter/nf_conntrack_labels.h @@ -7,6 +7,8 @@ #include <uapi/linux/netfilter/xt_connlabel.h> +#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE) + struct nf_conn_labels { u8 words; unsigned long bits[]; @@ -29,7 +31,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct) u8 words; words = ACCESS_ONCE(net->ct.label_words); - if (words == 0 || WARN_ON_ONCE(words > 8)) + if (words == 0) return NULL; cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index e7e14ffe0f6a..e6bc14d8fa9a 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -3,6 +3,7 @@ #include <linux/list.h> #include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter/nf_tables.h> #include <net/netlink.h> @@ -288,7 +289,8 @@ struct nft_expr_ops { int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); - void (*destroy)(const struct nft_expr *expr); + void (*destroy)(const struct nft_ctx *ctx, + const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, const struct nft_expr *expr); int (*validate)(const struct nft_ctx *ctx, @@ -325,13 +327,15 @@ static inline void *nft_expr_priv(const struct nft_expr *expr) * @handle: rule handle * @genmask: generation mask * @dlen: length of expression data + * @ulen: length of user data (used for comments) * @data: expression data */ struct nft_rule { struct list_head list; - u64 handle:46, + u64 handle:42, genmask:2, - dlen:16; + dlen:12, + ulen:8; unsigned char data[] __attribute__((aligned(__alignof__(struct nft_expr)))); }; @@ -340,19 +344,13 @@ struct nft_rule { * struct nft_rule_trans - nf_tables rule update in transaction * * @list: used internally + * @ctx: rule context * @rule: rule that needs to be updated - * @chain: chain that this rule belongs to - * @table: table for which this chain applies - * @nlh: netlink header of the message that contain this update - * @family: family expressesed as AF_* */ struct nft_rule_trans { struct list_head list; + struct nft_ctx ctx; struct nft_rule *rule; - const struct nft_chain *chain; - const struct nft_table *table; - const struct nlmsghdr *nlh; - u8 family; }; static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule) @@ -370,6 +368,11 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule) return (struct nft_expr *)&rule->data[rule->dlen]; } +static inline void *nft_userdata(const struct nft_rule *rule) +{ + return (void *)&rule->data[rule->dlen]; +} + /* * The last pointer isn't really necessary, but the compiler isn't able to * determine that the result of nft_expr_last() is always the same since it @@ -521,6 +524,9 @@ void nft_unregister_chain_type(const struct nf_chain_type *); int nft_register_expr(struct nft_expr_type *); void nft_unregister_expr(struct nft_expr_type *); +#define nft_dereference(p) \ + nfnl_dereference(p, NFNL_SUBSYS_NFTABLES) + #define MODULE_ALIAS_NFT_FAMILY(family) \ MODULE_ALIAS("nft-afinfo-" __stringify(family)) diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index fbcc7fa536dc..773cce308bc6 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -5,6 +5,7 @@ #include <linux/list_nulls.h> #include <linux/atomic.h> #include <linux/netfilter/nf_conntrack_tcp.h> +#include <linux/seqlock.h> struct ctl_table_header; struct nf_conntrack_ecache; @@ -62,6 +63,13 @@ struct nf_ip_net { #endif }; +struct ct_pcpu { + spinlock_t lock; + struct hlist_nulls_head unconfirmed; + struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; +}; + struct netns_ct { atomic_t count; unsigned int expect_count; @@ -83,12 +91,11 @@ struct netns_ct { int sysctl_checksum; unsigned int htable_size; + seqcount_t generation; struct kmem_cache *nf_conntrack_cachep; struct hlist_nulls_head *hash; struct hlist_head *expect_hash; - struct hlist_nulls_head unconfirmed; - struct hlist_nulls_head dying; - struct hlist_nulls_head tmpl; + struct ct_pcpu __percpu *pcpu_lists; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h index 25d3b2f79c02..78c2f2e79920 100644 --- a/include/uapi/linux/netfilter/ipset/ip_set.h +++ b/include/uapi/linux/netfilter/ipset/ip_set.h @@ -82,6 +82,8 @@ enum { IPSET_ATTR_PROTO, /* 7 */ IPSET_ATTR_CADT_FLAGS, /* 8 */ IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */ + IPSET_ATTR_MARK, /* 10 */ + IPSET_ATTR_MARKMASK, /* 11 */ /* Reserve empty slots */ IPSET_ATTR_CADT_MAX = 16, /* Create-only specific attributes */ @@ -144,6 +146,7 @@ enum ipset_errno { IPSET_ERR_IPADDR_IPV6, IPSET_ERR_COUNTER, IPSET_ERR_COMMENT, + IPSET_ERR_INVALID_MARKMASK, /* Type specific error codes */ IPSET_ERR_TYPE_SPECIFIC = 4352, @@ -182,9 +185,18 @@ enum ipset_cadt_flags { IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS), IPSET_FLAG_BIT_WITH_COMMENT = 4, IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT), + IPSET_FLAG_BIT_WITH_FORCEADD = 5, + IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD), IPSET_FLAG_CADT_MAX = 15, }; +/* The flag bits which correspond to the non-extension create flags */ +enum ipset_create_flags { + IPSET_CREATE_FLAG_BIT_FORCEADD = 0, + IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD), + IPSET_CREATE_FLAG_BIT_MAX = 7, +}; + /* Commands with settype-specific attributes */ enum ipset_adt { IPSET_ADD, diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index 83c985a6170b..c88ccbfda5f1 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1,7 +1,8 @@ #ifndef _LINUX_NF_TABLES_H #define _LINUX_NF_TABLES_H -#define NFT_CHAIN_MAXNAMELEN 32 +#define NFT_CHAIN_MAXNAMELEN 32 +#define NFT_USERDATA_MAXLEN 256 enum nft_registers { NFT_REG_VERDICT, @@ -156,6 +157,7 @@ enum nft_chain_attributes { * @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes) * @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes) * @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64) + * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN) */ enum nft_rule_attributes { NFTA_RULE_UNSPEC, @@ -165,6 +167,7 @@ enum nft_rule_attributes { NFTA_RULE_EXPRESSIONS, NFTA_RULE_COMPAT, NFTA_RULE_POSITION, + NFTA_RULE_USERDATA, __NFTA_RULE_MAX }; #define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1) @@ -601,6 +604,7 @@ enum nft_ct_keys { NFT_CT_PROTOCOL, NFT_CT_PROTO_SRC, NFT_CT_PROTO_DST, + NFT_CT_LABELS, }; /** |