summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-03-17 15:06:24 -0400
committerDavid S. Miller <davem@davemloft.net>2014-03-17 15:06:24 -0400
commite86e180b824e00733bd0e499d412a595078f9b51 (patch)
treeebda350b99785b4d0dd0188dd28fa17ec8135474 /include
parente7ef085d0a9dc1cc72e7d8108ed3b4e1a5e8d938 (diff)
parent7d08487777c8b30dea34790734d708470faaf1e5 (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf-next
Pablo Neira Ayuso says: ==================== Netfilter/IPVS updates for net-next The following patchset contains Netfilter/IPVS updates for net-next, most relevantly they are: * cleanup to remove double semicolon from stephen hemminger. * calm down sparse warning in xt_ipcomp, from Fan Du. * nf_ct_labels support for nf_tables, from Florian Westphal. * new macros to simplify rcu dereferences in the scope of nfnetlink and nf_tables, from Patrick McHardy. * Accept queue and drop (including reason for drop) to verdict parsing in nf_tables, also from Patrick. * Remove unused random seed initialization in nfnetlink_log, from Florian Westphal. * Allow to attach user-specific information to nf_tables rules, useful to attach user comments to rule, from me. * Return errors in ipset according to the manpage documentation, from Jozsef Kadlecsik. * Fix coccinelle warnings related to incorrect bool type usage for ipset, from Fengguang Wu. * Add hash:ip,mark set type to ipset, from Vytas Dauksa. * Fix message for each spotted by ipset for each netns that is created, from Ilia Mirkin. * Add forceadd option to ipset, which evicts a random entry from the set if it becomes full, from Josh Hunt. * Minor IPVS cleanups and fixes from Andi Kleen and Tingwei Liu. * Improve conntrack scalability by removing a central spinlock, original work from Eric Dumazet. Jesper Dangaard Brouer took them over to address remaining issues. Several patches to prepare this change come in first place. * Rework nft_hash to resolve bugs (leaking chain, missing rcu synchronization on element removal, etc. from Patrick McHardy. * Restore context in the rule deletion path, as we now release rule objects synchronously, from Patrick McHardy. This gets back event notification for anonymous sets. * Fix NAT family validation in nft_nat, also from Patrick. * Improve scalability of xt_connlimit by using an array of spinlocks and by introducing a rb-tree of hashtables for faster lookup of accounted objects per network. This patch was preceded by several patches and refactorizations to accomodate this change including the use of kmem_cache, from Florian Westphal. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r--include/linux/netfilter/ipset/ip_set.h15
-rw-r--r--include/linux/netfilter/nfnetlink.h21
-rw-r--r--include/net/netfilter/nf_conntrack.h11
-rw-r--r--include/net/netfilter/nf_conntrack_core.h9
-rw-r--r--include/net/netfilter/nf_conntrack_labels.h4
-rw-r--r--include/net/netfilter/nf_tables.h28
-rw-r--r--include/net/netns/conntrack.h13
-rw-r--r--include/uapi/linux/netfilter/ipset/ip_set.h12
-rw-r--r--include/uapi/linux/netfilter/nf_tables.h6
9 files changed, 96 insertions, 23 deletions
diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 0c7d01eae56c..96afc29184be 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -39,11 +39,13 @@ enum ip_set_feature {
IPSET_TYPE_NAME = (1 << IPSET_TYPE_NAME_FLAG),
IPSET_TYPE_IFACE_FLAG = 5,
IPSET_TYPE_IFACE = (1 << IPSET_TYPE_IFACE_FLAG),
- IPSET_TYPE_NOMATCH_FLAG = 6,
+ IPSET_TYPE_MARK_FLAG = 6,
+ IPSET_TYPE_MARK = (1 << IPSET_TYPE_MARK_FLAG),
+ IPSET_TYPE_NOMATCH_FLAG = 7,
IPSET_TYPE_NOMATCH = (1 << IPSET_TYPE_NOMATCH_FLAG),
/* Strictly speaking not a feature, but a flag for dumping:
* this settype must be dumped last */
- IPSET_DUMP_LAST_FLAG = 7,
+ IPSET_DUMP_LAST_FLAG = 8,
IPSET_DUMP_LAST = (1 << IPSET_DUMP_LAST_FLAG),
};
@@ -63,6 +65,7 @@ enum ip_set_extension {
#define SET_WITH_TIMEOUT(s) ((s)->extensions & IPSET_EXT_TIMEOUT)
#define SET_WITH_COUNTER(s) ((s)->extensions & IPSET_EXT_COUNTER)
#define SET_WITH_COMMENT(s) ((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_FORCEADD(s) ((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
/* Extension id, in size order */
enum ip_set_ext_id {
@@ -171,8 +174,6 @@ struct ip_set_type {
char name[IPSET_MAXNAMELEN];
/* Protocol version */
u8 protocol;
- /* Set features to control swapping */
- u8 features;
/* Set type dimension */
u8 dimension;
/*
@@ -182,6 +183,8 @@ struct ip_set_type {
u8 family;
/* Type revisions */
u8 revision_min, revision_max;
+ /* Set features to control swapping */
+ u16 features;
/* Create set */
int (*create)(struct net *net, struct ip_set *set,
@@ -217,6 +220,8 @@ struct ip_set {
u8 revision;
/* Extensions */
u8 extensions;
+ /* Create flags */
+ u8 flags;
/* Default timeout value, if enabled */
u32 timeout;
/* Element data size */
@@ -251,6 +256,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
if (SET_WITH_COMMENT(set))
cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+ if (SET_WITH_FORCEADD(set))
+ cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
if (!cadt_flags)
return 0;
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 28c74367e900..e955d4730625 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -44,6 +44,27 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid,
void nfnl_lock(__u8 subsys_id);
void nfnl_unlock(__u8 subsys_id);
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_nfnl_is_held(__u8 subsys_id);
+#else
+static inline int lockdep_nfnl_is_held(__u8 subsys_id)
+{
+ return 1;
+}
+#endif /* CONFIG_PROVE_LOCKING */
+
+/*
+ * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys mutex
+ *
+ * @p: The pointer to read, prior to dereferencing
+ * @ss: The nfnetlink subsystem ID
+ *
+ * Return the value of the specified RCU-protected pointer, but omit
+ * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
+ * caller holds the NFNL subsystem mutex.
+ */
+#define nfnl_dereference(p, ss) \
+ rcu_dereference_protected(p, lockdep_nfnl_is_held(ss))
#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index b2ac6246b7e0..37252f71a380 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -73,10 +73,17 @@ struct nf_conn_help {
struct nf_conn {
/* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
- plus 1 for any connection(s) we are `master' for */
+ * plus 1 for any connection(s) we are `master' for
+ *
+ * Hint, SKB address this struct and refcnt via skb->nfct and
+ * helpers nf_conntrack_get() and nf_conntrack_put().
+ * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt,
+ * beware nf_ct_get() is different and don't inc refcnt.
+ */
struct nf_conntrack ct_general;
- spinlock_t lock;
+ spinlock_t lock;
+ u16 cpu;
/* XXX should I move this to the tail ? - Y.K */
/* These are my tuples; original and reply */
diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h
index 15308b8eb5b5..cc0c18827602 100644
--- a/include/net/netfilter/nf_conntrack_core.h
+++ b/include/net/netfilter/nf_conntrack_core.h
@@ -77,6 +77,13 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_l3proto *l3proto,
const struct nf_conntrack_l4proto *proto);
-extern spinlock_t nf_conntrack_lock ;
+#ifdef CONFIG_LOCKDEP
+# define CONNTRACK_LOCKS 8
+#else
+# define CONNTRACK_LOCKS 1024
+#endif
+extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS];
+
+extern spinlock_t nf_conntrack_expect_lock;
#endif /* _NF_CONNTRACK_CORE_H */
diff --git a/include/net/netfilter/nf_conntrack_labels.h b/include/net/netfilter/nf_conntrack_labels.h
index c985695283b3..dec6336bf850 100644
--- a/include/net/netfilter/nf_conntrack_labels.h
+++ b/include/net/netfilter/nf_conntrack_labels.h
@@ -7,6 +7,8 @@
#include <uapi/linux/netfilter/xt_connlabel.h>
+#define NF_CT_LABELS_MAX_SIZE ((XT_CONNLABEL_MAXBIT + 1) / BITS_PER_BYTE)
+
struct nf_conn_labels {
u8 words;
unsigned long bits[];
@@ -29,7 +31,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
u8 words;
words = ACCESS_ONCE(net->ct.label_words);
- if (words == 0 || WARN_ON_ONCE(words > 8))
+ if (words == 0)
return NULL;
cl_ext = nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e7e14ffe0f6a..e6bc14d8fa9a 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -3,6 +3,7 @@
#include <linux/list.h>
#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netlink.h>
@@ -288,7 +289,8 @@ struct nft_expr_ops {
int (*init)(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nlattr * const tb[]);
- void (*destroy)(const struct nft_expr *expr);
+ void (*destroy)(const struct nft_ctx *ctx,
+ const struct nft_expr *expr);
int (*dump)(struct sk_buff *skb,
const struct nft_expr *expr);
int (*validate)(const struct nft_ctx *ctx,
@@ -325,13 +327,15 @@ static inline void *nft_expr_priv(const struct nft_expr *expr)
* @handle: rule handle
* @genmask: generation mask
* @dlen: length of expression data
+ * @ulen: length of user data (used for comments)
* @data: expression data
*/
struct nft_rule {
struct list_head list;
- u64 handle:46,
+ u64 handle:42,
genmask:2,
- dlen:16;
+ dlen:12,
+ ulen:8;
unsigned char data[]
__attribute__((aligned(__alignof__(struct nft_expr))));
};
@@ -340,19 +344,13 @@ struct nft_rule {
* struct nft_rule_trans - nf_tables rule update in transaction
*
* @list: used internally
+ * @ctx: rule context
* @rule: rule that needs to be updated
- * @chain: chain that this rule belongs to
- * @table: table for which this chain applies
- * @nlh: netlink header of the message that contain this update
- * @family: family expressesed as AF_*
*/
struct nft_rule_trans {
struct list_head list;
+ struct nft_ctx ctx;
struct nft_rule *rule;
- const struct nft_chain *chain;
- const struct nft_table *table;
- const struct nlmsghdr *nlh;
- u8 family;
};
static inline struct nft_expr *nft_expr_first(const struct nft_rule *rule)
@@ -370,6 +368,11 @@ static inline struct nft_expr *nft_expr_last(const struct nft_rule *rule)
return (struct nft_expr *)&rule->data[rule->dlen];
}
+static inline void *nft_userdata(const struct nft_rule *rule)
+{
+ return (void *)&rule->data[rule->dlen];
+}
+
/*
* The last pointer isn't really necessary, but the compiler isn't able to
* determine that the result of nft_expr_last() is always the same since it
@@ -521,6 +524,9 @@ void nft_unregister_chain_type(const struct nf_chain_type *);
int nft_register_expr(struct nft_expr_type *);
void nft_unregister_expr(struct nft_expr_type *);
+#define nft_dereference(p) \
+ nfnl_dereference(p, NFNL_SUBSYS_NFTABLES)
+
#define MODULE_ALIAS_NFT_FAMILY(family) \
MODULE_ALIAS("nft-afinfo-" __stringify(family))
diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index fbcc7fa536dc..773cce308bc6 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -5,6 +5,7 @@
#include <linux/list_nulls.h>
#include <linux/atomic.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
+#include <linux/seqlock.h>
struct ctl_table_header;
struct nf_conntrack_ecache;
@@ -62,6 +63,13 @@ struct nf_ip_net {
#endif
};
+struct ct_pcpu {
+ spinlock_t lock;
+ struct hlist_nulls_head unconfirmed;
+ struct hlist_nulls_head dying;
+ struct hlist_nulls_head tmpl;
+};
+
struct netns_ct {
atomic_t count;
unsigned int expect_count;
@@ -83,12 +91,11 @@ struct netns_ct {
int sysctl_checksum;
unsigned int htable_size;
+ seqcount_t generation;
struct kmem_cache *nf_conntrack_cachep;
struct hlist_nulls_head *hash;
struct hlist_head *expect_hash;
- struct hlist_nulls_head unconfirmed;
- struct hlist_nulls_head dying;
- struct hlist_nulls_head tmpl;
+ struct ct_pcpu __percpu *pcpu_lists;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 25d3b2f79c02..78c2f2e79920 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -82,6 +82,8 @@ enum {
IPSET_ATTR_PROTO, /* 7 */
IPSET_ATTR_CADT_FLAGS, /* 8 */
IPSET_ATTR_CADT_LINENO = IPSET_ATTR_LINENO, /* 9 */
+ IPSET_ATTR_MARK, /* 10 */
+ IPSET_ATTR_MARKMASK, /* 11 */
/* Reserve empty slots */
IPSET_ATTR_CADT_MAX = 16,
/* Create-only specific attributes */
@@ -144,6 +146,7 @@ enum ipset_errno {
IPSET_ERR_IPADDR_IPV6,
IPSET_ERR_COUNTER,
IPSET_ERR_COMMENT,
+ IPSET_ERR_INVALID_MARKMASK,
/* Type specific error codes */
IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -182,9 +185,18 @@ enum ipset_cadt_flags {
IPSET_FLAG_WITH_COUNTERS = (1 << IPSET_FLAG_BIT_WITH_COUNTERS),
IPSET_FLAG_BIT_WITH_COMMENT = 4,
IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
+ IPSET_FLAG_BIT_WITH_FORCEADD = 5,
+ IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
IPSET_FLAG_CADT_MAX = 15,
};
+/* The flag bits which correspond to the non-extension create flags */
+enum ipset_create_flags {
+ IPSET_CREATE_FLAG_BIT_FORCEADD = 0,
+ IPSET_CREATE_FLAG_FORCEADD = (1 << IPSET_CREATE_FLAG_BIT_FORCEADD),
+ IPSET_CREATE_FLAG_BIT_MAX = 7,
+};
+
/* Commands with settype-specific attributes */
enum ipset_adt {
IPSET_ADD,
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 83c985a6170b..c88ccbfda5f1 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,7 +1,8 @@
#ifndef _LINUX_NF_TABLES_H
#define _LINUX_NF_TABLES_H
-#define NFT_CHAIN_MAXNAMELEN 32
+#define NFT_CHAIN_MAXNAMELEN 32
+#define NFT_USERDATA_MAXLEN 256
enum nft_registers {
NFT_REG_VERDICT,
@@ -156,6 +157,7 @@ enum nft_chain_attributes {
* @NFTA_RULE_EXPRESSIONS: list of expressions (NLA_NESTED: nft_expr_attributes)
* @NFTA_RULE_COMPAT: compatibility specifications of the rule (NLA_NESTED: nft_rule_compat_attributes)
* @NFTA_RULE_POSITION: numeric handle of the previous rule (NLA_U64)
+ * @NFTA_RULE_USERDATA: user data (NLA_BINARY, NFT_USERDATA_MAXLEN)
*/
enum nft_rule_attributes {
NFTA_RULE_UNSPEC,
@@ -165,6 +167,7 @@ enum nft_rule_attributes {
NFTA_RULE_EXPRESSIONS,
NFTA_RULE_COMPAT,
NFTA_RULE_POSITION,
+ NFTA_RULE_USERDATA,
__NFTA_RULE_MAX
};
#define NFTA_RULE_MAX (__NFTA_RULE_MAX - 1)
@@ -601,6 +604,7 @@ enum nft_ct_keys {
NFT_CT_PROTOCOL,
NFT_CT_PROTO_SRC,
NFT_CT_PROTO_DST,
+ NFT_CT_LABELS,
};
/**