diff options
Diffstat (limited to 'net/ipv4/tcp_metrics.c')
| -rw-r--r-- | net/ipv4/tcp_metrics.c | 354 | 
1 files changed, 341 insertions, 13 deletions
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 0abe67bb4d3a..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -8,6 +8,7 @@  #include <linux/init.h>  #include <linux/tcp.h>  #include <linux/hash.h> +#include <linux/tcp_metrics.h>  #include <net/inet_connection_sock.h>  #include <net/net_namespace.h> @@ -17,20 +18,10 @@  #include <net/ipv6.h>  #include <net/dst.h>  #include <net/tcp.h> +#include <net/genetlink.h>  int sysctl_tcp_nometrics_save __read_mostly; -enum tcp_metric_index { -	TCP_METRIC_RTT, -	TCP_METRIC_RTTVAR, -	TCP_METRIC_SSTHRESH, -	TCP_METRIC_CWND, -	TCP_METRIC_REORDERING, - -	/* Always last.  */ -	TCP_METRIC_MAX, -}; -  struct tcp_fastopen_metrics {  	u16	mss;  	u16	syn_loss:10;		/* Recurring Fast Open SYN losses */ @@ -45,8 +36,10 @@ struct tcp_metrics_block {  	u32				tcpm_ts;  	u32				tcpm_ts_stamp;  	u32				tcpm_lock; -	u32				tcpm_vals[TCP_METRIC_MAX]; +	u32				tcpm_vals[TCP_METRIC_MAX + 1];  	struct tcp_fastopen_metrics	tcpm_fastopen; + +	struct rcu_head			rcu_head;  };  static bool tcp_metric_locked(struct tcp_metrics_block *tm, @@ -690,6 +683,325 @@ void tcp_fastopen_cache_set(struct sock *sk, u16 mss,  	rcu_read_unlock();  } +static struct genl_family tcp_metrics_nl_family = { +	.id		= GENL_ID_GENERATE, +	.hdrsize	= 0, +	.name		= TCP_METRICS_GENL_NAME, +	.version	= TCP_METRICS_GENL_VERSION, +	.maxattr	= TCP_METRICS_ATTR_MAX, +	.netnsok	= true, +}; + +static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { +	[TCP_METRICS_ATTR_ADDR_IPV4]	= { .type = NLA_U32, }, +	[TCP_METRICS_ATTR_ADDR_IPV6]	= { .type = NLA_BINARY, +					    .len = sizeof(struct in6_addr), }, +	/* Following attributes are not received for GET/DEL, +	 * we keep them for reference +	 */ +#if 0 +	[TCP_METRICS_ATTR_AGE]		= { .type = NLA_MSECS, }, +	[TCP_METRICS_ATTR_TW_TSVAL]	= { .type = NLA_U32, }, +	[TCP_METRICS_ATTR_TW_TS_STAMP]	= { .type = NLA_S32, }, +	[TCP_METRICS_ATTR_VALS]		= { .type = NLA_NESTED, }, +	[TCP_METRICS_ATTR_FOPEN_MSS]	= { .type = NLA_U16, }, +	[TCP_METRICS_ATTR_FOPEN_SYN_DROPS]	= { .type = NLA_U16, }, +	[TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS]	= { .type = NLA_MSECS, }, +	[TCP_METRICS_ATTR_FOPEN_COOKIE]	= { .type = NLA_BINARY, +					    .len = TCP_FASTOPEN_COOKIE_MAX, }, +#endif +}; + +/* Add attributes, caller cancels its header on failure */ +static int tcp_metrics_fill_info(struct sk_buff *msg, +				 struct tcp_metrics_block *tm) +{ +	struct nlattr *nest; +	int i; + +	switch (tm->tcpm_addr.family) { +	case AF_INET: +		if (nla_put_be32(msg, TCP_METRICS_ATTR_ADDR_IPV4, +				tm->tcpm_addr.addr.a4) < 0) +			goto nla_put_failure; +		break; +	case AF_INET6: +		if (nla_put(msg, TCP_METRICS_ATTR_ADDR_IPV6, 16, +			    tm->tcpm_addr.addr.a6) < 0) +			goto nla_put_failure; +		break; +	default: +		return -EAFNOSUPPORT; +	} + +	if (nla_put_msecs(msg, TCP_METRICS_ATTR_AGE, +			  jiffies - tm->tcpm_stamp) < 0) +		goto nla_put_failure; +	if (tm->tcpm_ts_stamp) { +		if (nla_put_s32(msg, TCP_METRICS_ATTR_TW_TS_STAMP, +				(s32) (get_seconds() - tm->tcpm_ts_stamp)) < 0) +			goto nla_put_failure; +		if (nla_put_u32(msg, TCP_METRICS_ATTR_TW_TSVAL, +				tm->tcpm_ts) < 0) +			goto nla_put_failure; +	} + +	{ +		int n = 0; + +		nest = nla_nest_start(msg, TCP_METRICS_ATTR_VALS); +		if (!nest) +			goto nla_put_failure; +		for (i = 0; i < TCP_METRIC_MAX + 1; i++) { +			if (!tm->tcpm_vals[i]) +				continue; +			if (nla_put_u32(msg, i + 1, tm->tcpm_vals[i]) < 0) +				goto nla_put_failure; +			n++; +		} +		if (n) +			nla_nest_end(msg, nest); +		else +			nla_nest_cancel(msg, nest); +	} + +	{ +		struct tcp_fastopen_metrics tfom_copy[1], *tfom; +		unsigned int seq; + +		do { +			seq = read_seqbegin(&fastopen_seqlock); +			tfom_copy[0] = tm->tcpm_fastopen; +		} while (read_seqretry(&fastopen_seqlock, seq)); + +		tfom = tfom_copy; +		if (tfom->mss && +		    nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_MSS, +				tfom->mss) < 0) +			goto nla_put_failure; +		if (tfom->syn_loss && +		    (nla_put_u16(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROPS, +				tfom->syn_loss) < 0 || +		     nla_put_msecs(msg, TCP_METRICS_ATTR_FOPEN_SYN_DROP_TS, +				jiffies - tfom->last_syn_loss) < 0)) +			goto nla_put_failure; +		if (tfom->cookie.len > 0 && +		    nla_put(msg, TCP_METRICS_ATTR_FOPEN_COOKIE, +			    tfom->cookie.len, tfom->cookie.val) < 0) +			goto nla_put_failure; +	} + +	return 0; + +nla_put_failure: +	return -EMSGSIZE; +} + +static int tcp_metrics_dump_info(struct sk_buff *skb, +				 struct netlink_callback *cb, +				 struct tcp_metrics_block *tm) +{ +	void *hdr; + +	hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, +			  &tcp_metrics_nl_family, NLM_F_MULTI, +			  TCP_METRICS_CMD_GET); +	if (!hdr) +		return -EMSGSIZE; + +	if (tcp_metrics_fill_info(skb, tm) < 0) +		goto nla_put_failure; + +	return genlmsg_end(skb, hdr); + +nla_put_failure: +	genlmsg_cancel(skb, hdr); +	return -EMSGSIZE; +} + +static int tcp_metrics_nl_dump(struct sk_buff *skb, +			       struct netlink_callback *cb) +{ +	struct net *net = sock_net(skb->sk); +	unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; +	unsigned int row, s_row = cb->args[0]; +	int s_col = cb->args[1], col = s_col; + +	for (row = s_row; row < max_rows; row++, s_col = 0) { +		struct tcp_metrics_block *tm; +		struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash + row; + +		rcu_read_lock(); +		for (col = 0, tm = rcu_dereference(hb->chain); tm; +		     tm = rcu_dereference(tm->tcpm_next), col++) { +			if (col < s_col) +				continue; +			if (tcp_metrics_dump_info(skb, cb, tm) < 0) { +				rcu_read_unlock(); +				goto done; +			} +		} +		rcu_read_unlock(); +	} + +done: +	cb->args[0] = row; +	cb->args[1] = col; +	return skb->len; +} + +static int parse_nl_addr(struct genl_info *info, struct inetpeer_addr *addr, +			 unsigned int *hash, int optional) +{ +	struct nlattr *a; + +	a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV4]; +	if (a) { +		addr->family = AF_INET; +		addr->addr.a4 = nla_get_be32(a); +		*hash = (__force unsigned int) addr->addr.a4; +		return 0; +	} +	a = info->attrs[TCP_METRICS_ATTR_ADDR_IPV6]; +	if (a) { +		if (nla_len(a) != sizeof(sizeof(struct in6_addr))) +			return -EINVAL; +		addr->family = AF_INET6; +		memcpy(addr->addr.a6, nla_data(a), sizeof(addr->addr.a6)); +		*hash = ipv6_addr_hash((struct in6_addr *) addr->addr.a6); +		return 0; +	} +	return optional ? 1 : -EAFNOSUPPORT; +} + +static int tcp_metrics_nl_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ +	struct tcp_metrics_block *tm; +	struct inetpeer_addr addr; +	unsigned int hash; +	struct sk_buff *msg; +	struct net *net = genl_info_net(info); +	void *reply; +	int ret; + +	ret = parse_nl_addr(info, &addr, &hash, 0); +	if (ret < 0) +		return ret; + +	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +	if (!msg) +		return -ENOMEM; + +	reply = genlmsg_put_reply(msg, info, &tcp_metrics_nl_family, 0, +				  info->genlhdr->cmd); +	if (!reply) +		goto nla_put_failure; + +	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); +	ret = -ESRCH; +	rcu_read_lock(); +	for (tm = rcu_dereference(net->ipv4.tcp_metrics_hash[hash].chain); tm; +	     tm = rcu_dereference(tm->tcpm_next)) { +		if (addr_same(&tm->tcpm_addr, &addr)) { +			ret = tcp_metrics_fill_info(msg, tm); +			break; +		} +	} +	rcu_read_unlock(); +	if (ret < 0) +		goto out_free; + +	genlmsg_end(msg, reply); +	return genlmsg_reply(msg, info); + +nla_put_failure: +	ret = -EMSGSIZE; + +out_free: +	nlmsg_free(msg); +	return ret; +} + +#define deref_locked_genl(p)	\ +	rcu_dereference_protected(p, lockdep_genl_is_held() && \ +				     lockdep_is_held(&tcp_metrics_lock)) + +#define deref_genl(p)	rcu_dereference_protected(p, lockdep_genl_is_held()) + +static int tcp_metrics_flush_all(struct net *net) +{ +	unsigned int max_rows = 1U << net->ipv4.tcp_metrics_hash_log; +	struct tcpm_hash_bucket *hb = net->ipv4.tcp_metrics_hash; +	struct tcp_metrics_block *tm; +	unsigned int row; + +	for (row = 0; row < max_rows; row++, hb++) { +		spin_lock_bh(&tcp_metrics_lock); +		tm = deref_locked_genl(hb->chain); +		if (tm) +			hb->chain = NULL; +		spin_unlock_bh(&tcp_metrics_lock); +		while (tm) { +			struct tcp_metrics_block *next; + +			next = deref_genl(tm->tcpm_next); +			kfree_rcu(tm, rcu_head); +			tm = next; +		} +	} +	return 0; +} + +static int tcp_metrics_nl_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ +	struct tcpm_hash_bucket *hb; +	struct tcp_metrics_block *tm; +	struct tcp_metrics_block __rcu **pp; +	struct inetpeer_addr addr; +	unsigned int hash; +	struct net *net = genl_info_net(info); +	int ret; + +	ret = parse_nl_addr(info, &addr, &hash, 1); +	if (ret < 0) +		return ret; +	if (ret > 0) +		return tcp_metrics_flush_all(net); + +	hash = hash_32(hash, net->ipv4.tcp_metrics_hash_log); +	hb = net->ipv4.tcp_metrics_hash + hash; +	pp = &hb->chain; +	spin_lock_bh(&tcp_metrics_lock); +	for (tm = deref_locked_genl(*pp); tm; +	     pp = &tm->tcpm_next, tm = deref_locked_genl(*pp)) { +		if (addr_same(&tm->tcpm_addr, &addr)) { +			*pp = tm->tcpm_next; +			break; +		} +	} +	spin_unlock_bh(&tcp_metrics_lock); +	if (!tm) +		return -ESRCH; +	kfree_rcu(tm, rcu_head); +	return 0; +} + +static struct genl_ops tcp_metrics_nl_ops[] = { +	{ +		.cmd = TCP_METRICS_CMD_GET, +		.doit = tcp_metrics_nl_cmd_get, +		.dumpit = tcp_metrics_nl_dump, +		.policy = tcp_metrics_nl_policy, +		.flags = GENL_ADMIN_PERM, +	}, +	{ +		.cmd = TCP_METRICS_CMD_DEL, +		.doit = tcp_metrics_nl_cmd_del, +		.policy = tcp_metrics_nl_policy, +		.flags = GENL_ADMIN_PERM, +	}, +}; +  static unsigned int tcpmhash_entries;  static int __init set_tcpmhash_entries(char *str)  { @@ -753,5 +1065,21 @@ static __net_initdata struct pernet_operations tcp_net_metrics_ops = {  void __init tcp_metrics_init(void)  { -	register_pernet_subsys(&tcp_net_metrics_ops); +	int ret; + +	ret = register_pernet_subsys(&tcp_net_metrics_ops); +	if (ret < 0) +		goto cleanup; +	ret = genl_register_family_with_ops(&tcp_metrics_nl_family, +					    tcp_metrics_nl_ops, +					    ARRAY_SIZE(tcp_metrics_nl_ops)); +	if (ret < 0) +		goto cleanup_subsys; +	return; + +cleanup_subsys: +	unregister_pernet_subsys(&tcp_net_metrics_ops); + +cleanup: +	return;  }  | 
