diff options
author | David S. Miller <davem@davemloft.net> | 2019-05-30 13:41:26 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-05-30 13:41:26 -0700 |
commit | d48ecb40b5b5a156ae3658dc336bfc29ea502eb6 (patch) | |
tree | 0d30a02c064add3df6acf89bfda325e2eff610bf | |
parent | 5b5d331a2c933268eae8da85df43279878392772 (diff) | |
parent | 10fbcdd12aa24fdb4b357a39abc03686d15f04fd (diff) |
Merge branch 'add-TFO-backup-key'
Jason Baron says:
====================
add TFO backup key
Christoph, Igor, and I have worked on an API that facilitates TFO key
rotation. This is a follow up to the series that Christoph previously
posted, with an API that meets both of our use-cases. Here's a
link to the previous work:
https://patchwork.ozlabs.org/cover/1013753/
Changes in v2:
-spelling fixes in ip-sysctl.txt (Jeremy Sowden)
-re-base to latest net-next
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 20 | ||||
-rw-r--r-- | include/net/tcp.h | 41 | ||||
-rw-r--r-- | include/uapi/linux/snmp.h | 1 | ||||
-rw-r--r-- | net/ipv4/proc.c | 1 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 93 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 29 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 233 | ||||
-rw-r--r-- | tools/testing/selftests/net/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/Makefile | 2 | ||||
-rw-r--r-- | tools/testing/selftests/net/tcp_fastopen_backup_key.c | 336 | ||||
-rwxr-xr-x | tools/testing/selftests/net/tcp_fastopen_backup_key.sh | 55 |
11 files changed, 694 insertions, 118 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 14fe93049d28..a73b3a02e49a 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -648,6 +648,26 @@ tcp_fastopen_blackhole_timeout_sec - INTEGER 0 to disable the blackhole detection. By default, it is set to 1hr. +tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs + The list consists of a primary key and an optional backup key. The + primary key is used for both creating and validating cookies, while the + optional backup key is only used for validating cookies. The purpose of + the backup key is to maximize TFO validation when keys are rotated. + + A randomly chosen primary key may be configured by the kernel if + the tcp_fastopen sysctl is set to 0x400 (see above), or if the + TCP_FASTOPEN setsockopt() optname is set and a key has not been + previously configured via sysctl. If keys are configured via + setsockopt() by using the TCP_FASTOPEN_KEY optname, then those + per-socket keys will be used instead of any keys that are specified via + sysctl. + + A key is specified as 4 8-digit hexadecimal integers which are separated + by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be + omitted. A primary and a backup key may be specified by separating them + by a comma. If only one key is specified, it becomes the primary key and + any previously configured backup keys are removed. + tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt will be retransmitted. Should not be higher than 127. Default value diff --git a/include/net/tcp.h b/include/net/tcp.h index 985aa5db570c..0083a14fb64f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1614,7 +1614,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); void tcp_fastopen_destroy_cipher(struct sock *sk); void tcp_fastopen_ctx_destroy(struct net *net); int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, - void *key, unsigned int len); + void *primary_key, void *backup_key, + unsigned int len); void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb); struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -1625,11 +1626,14 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie); bool tcp_fastopen_defer_connect(struct sock *sk, int *err); #define TCP_FASTOPEN_KEY_LENGTH 16 +#define TCP_FASTOPEN_KEY_MAX 2 +#define TCP_FASTOPEN_KEY_BUF_LENGTH \ + (TCP_FASTOPEN_KEY_LENGTH * TCP_FASTOPEN_KEY_MAX) /* Fastopen key context */ struct tcp_fastopen_context { - struct crypto_cipher *tfm; - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct crypto_cipher *tfm[TCP_FASTOPEN_KEY_MAX]; + __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; struct rcu_head rcu; }; @@ -1639,6 +1643,37 @@ bool tcp_fastopen_active_should_disable(struct sock *sk); void tcp_fastopen_active_disable_ofo_check(struct sock *sk); void tcp_fastopen_active_detect_blackhole(struct sock *sk, bool expired); +/* Caller needs to wrap with rcu_read_(un)lock() */ +static inline +struct tcp_fastopen_context *tcp_fastopen_get_ctx(const struct sock *sk) +{ + struct tcp_fastopen_context *ctx; + + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); + if (!ctx) + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); + return ctx; +} + +static inline +bool tcp_fastopen_cookie_match(const struct tcp_fastopen_cookie *foc, + const struct tcp_fastopen_cookie *orig) +{ + if (orig->len == TCP_FASTOPEN_COOKIE_SIZE && + orig->len == foc->len && + !memcmp(orig->val, foc->val, foc->len)) + return true; + return false; +} + +static inline +int tcp_fastopen_context_len(const struct tcp_fastopen_context *ctx) +{ + if (ctx->tfm[1]) + return 2; + return 1; +} + /* Latencies incurred by various limits for a sender. They are * chronograph-like stats that are mutually exclusive. */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 86dc24a96c90..74904e9d1b72 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -283,6 +283,7 @@ enum LINUX_MIB_TCPACKCOMPRESSED, /* TCPAckCompressed */ LINUX_MIB_TCPZEROWINDOWDROP, /* TCPZeroWindowDrop */ LINUX_MIB_TCPRCVQDROP, /* TCPRcvQDrop */ + LINUX_MIB_TCPFASTOPENPASSIVEALTKEY, /* TCPFastOpenPassiveAltKey */ __LINUX_MIB_MAX }; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index b613572c6616..4746f963c439 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -291,6 +291,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPAckCompressed", LINUX_MIB_TCPACKCOMPRESSED), SNMP_MIB_ITEM("TCPZeroWindowDrop", LINUX_MIB_TCPZEROWINDOWDROP), SNMP_MIB_ITEM("TCPRcvQDrop", LINUX_MIB_TCPRCVQDROP), + SNMP_MIB_ITEM("TCPFastOpenPassiveAltKey", LINUX_MIB_TCPFASTOPENPASSIVEALTKEY), SNMP_MIB_SENTINEL }; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 875867b64d6a..90f09e47198b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -277,55 +277,97 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl, return ret; } +static int sscanf_key(char *buf, __le32 *key) +{ + u32 user_key[4]; + int i, ret = 0; + + if (sscanf(buf, "%x-%x-%x-%x", user_key, user_key + 1, + user_key + 2, user_key + 3) != 4) { + ret = -EINVAL; + } else { + for (i = 0; i < ARRAY_SIZE(user_key); i++) + key[i] = cpu_to_le32(user_key[i]); + } + pr_debug("proc TFO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", + user_key[0], user_key[1], user_key[2], user_key[3], buf, ret); + + return ret; +} + static int proc_tcp_fastopen_key(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_tcp_fastopen); - struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; - struct tcp_fastopen_context *ctxt; - u32 user_key[4]; /* 16 bytes, matching TCP_FASTOPEN_KEY_LENGTH */ - __le32 key[4]; - int ret, i; + /* maxlen to print the list of keys in hex (*2), with dashes + * separating doublewords and a comma in between keys. + */ + struct ctl_table tbl = { .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * + 2 * TCP_FASTOPEN_KEY_MAX) + + (TCP_FASTOPEN_KEY_MAX * 5)) }; + struct tcp_fastopen_context *ctx; + u32 user_key[TCP_FASTOPEN_KEY_MAX * 4]; + __le32 key[TCP_FASTOPEN_KEY_MAX * 4]; + char *backup_data; + int ret, i = 0, off = 0, n_keys = 0; tbl.data = kmalloc(tbl.maxlen, GFP_KERNEL); if (!tbl.data) return -ENOMEM; rcu_read_lock(); - ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); - if (ctxt) - memcpy(key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); - else - memset(key, 0, sizeof(key)); + ctx = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctx) { + n_keys = tcp_fastopen_context_len(ctx); + memcpy(&key[0], &ctx->key[0], TCP_FASTOPEN_KEY_LENGTH * n_keys); + } rcu_read_unlock(); - for (i = 0; i < ARRAY_SIZE(key); i++) + if (!n_keys) { + memset(&key[0], 0, TCP_FASTOPEN_KEY_LENGTH); + n_keys = 1; + } + + for (i = 0; i < n_keys * 4; i++) user_key[i] = le32_to_cpu(key[i]); - snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", - user_key[0], user_key[1], user_key[2], user_key[3]); + for (i = 0; i < n_keys; i++) { + off += snprintf(tbl.data + off, tbl.maxlen - off, + "%08x-%08x-%08x-%08x", + user_key[i * 4], + user_key[i * 4 + 1], + user_key[i * 4 + 2], + user_key[i * 4 + 3]); + if (i + 1 < n_keys) + off += snprintf(tbl.data + off, tbl.maxlen - off, ","); + } + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) { - if (sscanf(tbl.data, "%x-%x-%x-%x", user_key, user_key + 1, - user_key + 2, user_key + 3) != 4) { + backup_data = strchr(tbl.data, ','); + if (backup_data) { + *backup_data = '\0'; + backup_data++; + } + if (sscanf_key(tbl.data, key)) { ret = -EINVAL; goto bad_key; } - - for (i = 0; i < ARRAY_SIZE(user_key); i++) - key[i] = cpu_to_le32(user_key[i]); - + if (backup_data) { + if (sscanf_key(backup_data, key + 4)) { + ret = -EINVAL; + goto bad_key; + } + } tcp_fastopen_reset_cipher(net, NULL, key, + backup_data ? key + 4 : NULL, TCP_FASTOPEN_KEY_LENGTH); } bad_key: - pr_debug("proc FO key set 0x%x-%x-%x-%x <- 0x%s: %u\n", - user_key[0], user_key[1], user_key[2], user_key[3], - (char *)tbl.data, ret); kfree(tbl.data); return ret; } @@ -933,7 +975,12 @@ static struct ctl_table ipv4_net_table[] = { .procname = "tcp_fastopen_key", .mode = 0600, .data = &init_net.ipv4.sysctl_tcp_fastopen, - .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + /* maxlen to print the list of keys in hex (*2), with dashes + * separating doublewords and a comma in between keys. + */ + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * + 2 * TCP_FASTOPEN_KEY_MAX) + + (TCP_FASTOPEN_KEY_MAX * 5)), .proc_handler = proc_tcp_fastopen_key, }, { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 53d61ca3ac4b..27ce13ece510 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2790,15 +2790,24 @@ static int do_tcp_setsockopt(struct sock *sk, int level, return err; } case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; + __u8 *backup_key = NULL; - if (optlen != sizeof(key)) + /* Allow a backup key as well to facilitate key rotation + * First key is the active one. + */ + if (optlen != TCP_FASTOPEN_KEY_LENGTH && + optlen != TCP_FASTOPEN_KEY_BUF_LENGTH) return -EINVAL; if (copy_from_user(key, optval, optlen)) return -EFAULT; - return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key)); + if (optlen == TCP_FASTOPEN_KEY_BUF_LENGTH) + backup_key = key + TCP_FASTOPEN_KEY_LENGTH; + + return tcp_fastopen_reset_cipher(net, sk, key, backup_key, + TCP_FASTOPEN_KEY_LENGTH); } default: /* fallthru */ @@ -3452,21 +3461,23 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return 0; case TCP_FASTOPEN_KEY: { - __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + __u8 key[TCP_FASTOPEN_KEY_BUF_LENGTH]; struct tcp_fastopen_context *ctx; + unsigned int key_len = 0; if (get_user(len, optlen)) return -EFAULT; rcu_read_lock(); ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); - if (ctx) - memcpy(key, ctx->key, sizeof(key)); - else - len = 0; + if (ctx) { + key_len = tcp_fastopen_context_len(ctx) * + TCP_FASTOPEN_KEY_LENGTH; + memcpy(&key[0], &ctx->key[0], key_len); + } rcu_read_unlock(); - len = min_t(unsigned int, len, sizeof(key)); + len = min_t(unsigned int, len, key_len); if (put_user(len, optlen)) return -EFAULT; if (copy_to_user(optval, key, len)) diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 018a48477355..8e1580485c9e 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -30,14 +30,20 @@ void tcp_fastopen_init_key_once(struct net *net) * for a valid cookie, so this is an acceptable risk. */ get_random_bytes(key, sizeof(key)); - tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key)); + tcp_fastopen_reset_cipher(net, NULL, key, NULL, sizeof(key)); } static void tcp_fastopen_ctx_free(struct rcu_head *head) { struct tcp_fastopen_context *ctx = container_of(head, struct tcp_fastopen_context, rcu); - crypto_free_cipher(ctx->tfm); + int i; + + /* We own ctx, thus no need to hold the Fastopen-lock */ + for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) { + if (ctx->tfm[i]) + crypto_free_cipher(ctx->tfm[i]); + } kfree(ctx); } @@ -66,33 +72,54 @@ void tcp_fastopen_ctx_destroy(struct net *net) call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); } +struct tcp_fastopen_context *tcp_fastopen_alloc_ctx(void *primary_key, + void *backup_key, + unsigned int len) +{ + struct tcp_fastopen_context *new_ctx; + void *key = primary_key; + int err, i; + + new_ctx = kmalloc(sizeof(*new_ctx), GFP_KERNEL); + if (!new_ctx) + return ERR_PTR(-ENOMEM); + for (i = 0; i < TCP_FASTOPEN_KEY_MAX; i++) + new_ctx->tfm[i] = NULL; + for (i = 0; i < (backup_key ? 2 : 1); i++) { + new_ctx->tfm[i] = crypto_alloc_cipher("aes", 0, 0); + if (IS_ERR(new_ctx->tfm[i])) { + err = PTR_ERR(new_ctx->tfm[i]); + new_ctx->tfm[i] = NULL; + pr_err("TCP: TFO aes cipher alloc error: %d\n", err); + goto out; + } + err = crypto_cipher_setkey(new_ctx->tfm[i], key, len); + if (err) { + pr_err("TCP: TFO cipher key error: %d\n", err); + goto out; + } + memcpy(&new_ctx->key[i * TCP_FASTOPEN_KEY_LENGTH], key, len); + key = backup_key; + } + return new_ctx; +out: + tcp_fastopen_ctx_free(&new_ctx->rcu); + return ERR_PTR(err); +} + int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, - void *key, unsigned int len) + void *primary_key, void *backup_key, + unsigned int len) { struct tcp_fastopen_context *ctx, *octx; struct fastopen_queue *q; - int err; + int err = 0; - ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) - return -ENOMEM; - ctx->tfm = crypto_alloc_cipher("aes", 0, 0); - - if (IS_ERR(ctx->tfm)) { - err = PTR_ERR(ctx->tfm); -error: kfree(ctx); - pr_err("TCP: TFO aes cipher alloc error: %d\n", err); - return err; + ctx = tcp_fastopen_alloc_ctx(primary_key, backup_key, len); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; } - err = crypto_cipher_setkey(ctx->tfm, key, len); - if (err) { - pr_err("TCP: TFO cipher key error: %d\n", err); - crypto_free_cipher(ctx->tfm); - goto error; - } - memcpy(ctx->key, key, len); - - spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); if (sk) { q = &inet_csk(sk)->icsk_accept_queue.fastopenq; @@ -108,28 +135,42 @@ error: kfree(ctx); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); +out: return err; } -static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path, - struct tcp_fastopen_cookie *foc) +static bool __tcp_fastopen_cookie_gen_cipher(struct request_sock *req, + struct sk_buff *syn, + struct crypto_cipher *tfm, + struct tcp_fastopen_cookie *foc) { - struct tcp_fastopen_context *ctx; - bool ok = false; - - rcu_read_lock(); + if (req->rsk_ops->family == AF_INET) { + const struct iphdr *iph = ip_hdr(syn); + __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); - if (!ctx) - ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); + crypto_cipher_encrypt_one(tfm, foc->val, (void *)path); + foc->len = TCP_FASTOPEN_COOKIE_SIZE; + return true; + } - if (ctx) { - crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); +#if IS_ENABLED(CONFIG_IPV6) + if (req->rsk_ops->family == AF_INET6) { + const struct ipv6hdr *ip6h = ipv6_hdr(syn); + struct tcp_fastopen_cookie tmp; + struct in6_addr *buf; + int i; + + crypto_cipher_encrypt_one(tfm, tmp.val, + (void *)&ip6h->saddr); + buf = &tmp.addr; + for (i = 0; i < 4; i++) + buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; + crypto_cipher_encrypt_one(tfm, foc->val, (void *)buf); foc->len = TCP_FASTOPEN_COOKIE_SIZE; - ok = true; + return true; } - rcu_read_unlock(); - return ok; +#endif + return false; } /* Generate the fastopen cookie by doing aes128 encryption on both @@ -138,37 +179,20 @@ static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path, * * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. */ -static bool tcp_fastopen_cookie_gen(struct sock *sk, +static void tcp_fastopen_cookie_gen(struct sock *sk, struct request_sock *req, struct sk_buff *syn, struct tcp_fastopen_cookie *foc) { - if (req->rsk_ops->family == AF_INET) { - const struct iphdr *iph = ip_hdr(syn); - - __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - return __tcp_fastopen_cookie_gen(sk, path, foc); - } - -#if IS_ENABLED(CONFIG_IPV6) - if (req->rsk_ops->family == AF_INET6) { - const struct ipv6hdr *ip6h = ipv6_hdr(syn); - struct tcp_fastopen_cookie tmp; - - if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) { - struct in6_addr *buf = &tmp.addr; - int i; + struct tcp_fastopen_context *ctx; - for (i = 0; i < 4; i++) - buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; - return __tcp_fastopen_cookie_gen(sk, buf, foc); - } - } -#endif - return false; + rcu_read_lock(); + ctx = tcp_fastopen_get_ctx(sk); + if (ctx) + __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[0], foc); + rcu_read_unlock(); } - /* If an incoming SYN or SYNACK frame contains a payload and/or FIN, * queue this additional data / FIN. */ @@ -212,6 +236,35 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) tcp_fin(sk); } +/* returns 0 - no key match, 1 for primary, 2 for backup */ +static int tcp_fastopen_cookie_gen_check(struct sock *sk, + struct request_sock *req, + struct sk_buff *syn, + struct tcp_fastopen_cookie *orig, + struct tcp_fastopen_cookie *valid_foc) +{ + struct tcp_fastopen_cookie search_foc = { .len = -1 }; + struct tcp_fastopen_cookie *foc = valid_foc; + struct tcp_fastopen_context *ctx; + int i, ret = 0; + + rcu_read_lock(); + ctx = tcp_fastopen_get_ctx(sk); + if (!ctx) + goto out; + for (i = 0; i < tcp_fastopen_context_len(ctx); i++) { + __tcp_fastopen_cookie_gen_cipher(req, syn, ctx->tfm[i], foc); + if (tcp_fastopen_cookie_match(foc, orig)) { + ret = i + 1; + goto out; + } + foc = &search_foc; + } +out: + rcu_read_unlock(); + return ret; +} + static struct sock *tcp_fastopen_create_child(struct sock *sk, struct sk_buff *skb, struct request_sock *req) @@ -331,6 +384,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; + int ret = 0; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); @@ -346,31 +400,44 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; - if (foc->len >= 0 && /* Client presents or requests a cookie */ - tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) && - foc->len == TCP_FASTOPEN_COOKIE_SIZE && - foc->len == valid_foc.len && - !memcmp(foc->val, valid_foc.val, foc->len)) { - /* Cookie is valid. Create a (full) child socket to accept - * the data in SYN before returning a SYN-ACK to ack the - * data. If we fail to create the socket, fall back and - * ack the ISN only but includes the same cookie. - * - * Note: Data-less SYN with valid cookie is allowed to send - * data in SYN_RECV state. - */ + if (foc->len == 0) { + /* Client requests a cookie. */ + tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc); + } else if (foc->len > 0) { + ret = tcp_fastopen_cookie_gen_check(sk, req, skb, foc, + &valid_foc); + if (!ret) { + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVEFAIL); + } else { + /* Cookie is valid. Create a (full) child socket to + * accept the data in SYN before returning a SYN-ACK to + * ack the data. If we fail to create the socket, fall + * back and ack the ISN only but includes the same + * cookie. + * + * Note: Data-less SYN with valid cookie is allowed to + * send data in SYN_RECV state. + */ fastopen: - child = tcp_fastopen_create_child(sk, skb, req); - if (child) { - foc->len = -1; + child = tcp_fastopen_create_child(sk, skb, req); + if (child) { + if (ret == 2) { + valid_foc.exp = foc->exp; + *foc = valid_foc; + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVEALTKEY); + } else { + foc->len = -1; + } + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVE); + return child; + } NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVE); - return child; + LINUX_MIB_TCPFASTOPENPASSIVEFAIL); } - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (foc->len > 0) /* Client presents an invalid cookie */ - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - + } valid_foc.exp = foc->exp; *foc = valid_foc; return NULL; diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 99a4e41d5249..4ce0bc1612f5 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -20,3 +20,4 @@ ip_defrag so_txtime flowlabel flowlabel_mgr +tcp_fastopen_backup_key diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 8343fb9d8a46..9a275d932fd5 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -10,12 +10,14 @@ TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh +TEST_PROGS += tcp_fastopen_backup_key.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr +TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c new file mode 100644 index 000000000000..58bb77d9e7e1 --- /dev/null +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Test key rotation for TFO. + * New keys are 'rotated' in two steps: + * 1) Add new key as the 'backup' key 'behind' the primary key + * 2) Make new key the primary by swapping the backup and primary keys + * + * The rotation is done in stages using multiple sockets bound + * to the same port via SO_REUSEPORT. This simulates key rotation + * behind say a load balancer. We verify that across the rotation + * there are no cases in which a cookie is not accepted by verifying + * that TcpExtTCPFastOpenPassiveFail remains 0. + */ +#define _GNU_SOURCE +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <unistd.h> +#include <netinet/tcp.h> +#include <fcntl.h> +#include <time.h> + +#ifndef TCP_FASTOPEN_KEY +#define TCP_FASTOPEN_KEY 33 +#endif + +#define N_LISTEN 10 +#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key" +#define KEY_LENGTH 16 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#endif + +static bool do_ipv6; +static bool do_sockopt; +static bool do_rotate; +static int key_len = KEY_LENGTH; +static int rcv_fds[N_LISTEN]; +static int proc_fd; +static const char *IP4_ADDR = "127.0.0.1"; +static const char *IP6_ADDR = "::1"; +static const int PORT = 8891; + +static void get_keys(int fd, uint32_t *keys) +{ + char buf[128]; + int len = KEY_LENGTH * 2; + + if (do_sockopt) { + if (getsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, &len)) + error(1, errno, "Unable to get key"); + return; + } + lseek(proc_fd, 0, SEEK_SET); + if (read(proc_fd, buf, sizeof(buf)) <= 0) + error(1, errno, "Unable to read %s", PROC_FASTOPEN_KEY); + if (sscanf(buf, "%x-%x-%x-%x,%x-%x-%x-%x", keys, keys + 1, keys + 2, + keys + 3, keys + 4, keys + 5, keys + 6, keys + 7) != 8) + error(1, 0, "Unable to parse %s", PROC_FASTOPEN_KEY); +} + +static void set_keys(int fd, uint32_t *keys) +{ + char buf[128]; + + if (do_sockopt) { + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN_KEY, keys, + key_len)) + error(1, errno, "Unable to set key"); + return; + } + if (do_rotate) + snprintf(buf, 128, "%08x-%08x-%08x-%08x,%08x-%08x-%08x-%08x", + keys[0], keys[1], keys[2], keys[3], keys[4], keys[5], + keys[6], keys[7]); + else + snprintf(buf, 128, "%08x-%08x-%08x-%08x", + keys[0], keys[1], keys[2], keys[3]); + lseek(proc_fd, 0, SEEK_SET); + if (write(proc_fd, buf, sizeof(buf)) <= 0) + error(1, errno, "Unable to write %s", PROC_FASTOPEN_KEY); +} + +static void build_rcv_fd(int family, int proto, int *rcv_fds) +{ + struct sockaddr_in addr4 = {0}; + struct sockaddr_in6 addr6 = {0}; + struct sockaddr *addr; + int opt = 1, i, sz; + int qlen = 100; + uint32_t keys[8]; + + switch (family) { + case AF_INET: + addr4.sin_family = family; + addr4.sin_addr.s_addr = htonl(INADDR_ANY); + addr4.sin_port = htons(PORT); + sz = sizeof(addr4); + addr = (struct sockaddr *)&addr4; + break; + case AF_INET6: + addr6.sin6_family = AF_INET6; + addr6.sin6_addr = in6addr_any; + addr6.sin6_port = htons(PORT); + sz = sizeof(addr6); + addr = (struct sockaddr *)&addr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return; + } + for (i = 0; i < ARRAY_SIZE(keys); i++) + keys[i] = rand(); + for (i = 0; i < N_LISTEN; i++) { + rcv_fds[i] = socket(family, proto, 0); + if (rcv_fds[i] < 0) + error(1, errno, "failed to create receive socket"); + if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt, + sizeof(opt))) + error(1, errno, "failed to set SO_REUSEPORT"); + if (bind(rcv_fds[i], addr, sz)) + error(1, errno, "failed to bind receive socket"); + if (setsockopt(rcv_fds[i], SOL_TCP, TCP_FASTOPEN, &qlen, + sizeof(qlen))) + error(1, errno, "failed to set TCP_FASTOPEN"); + set_keys(rcv_fds[i], keys); + if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) + error(1, errno, "failed to listen on receive port"); + } +} + +static int connect_and_send(int family, int proto) +{ + struct sockaddr_in saddr4 = {0}; + struct sockaddr_in daddr4 = {0}; + struct sockaddr_in6 saddr6 = {0}; + struct sockaddr_in6 daddr6 = {0}; + struct sockaddr *saddr, *daddr; + int fd, sz, ret; + char data[1]; + + switch (family) { + case AF_INET: + saddr4.sin_family = AF_INET; + saddr4.sin_addr.s_addr = htonl(INADDR_ANY); + saddr4.sin_port = 0; + + daddr4.sin_family = AF_INET; + if (!inet_pton(family, IP4_ADDR, &daddr4.sin_addr.s_addr)) + error(1, errno, "inet_pton failed: %s", IP4_ADDR); + daddr4.sin_port = htons(PORT); + + sz = sizeof(saddr4); + saddr = (struct sockaddr *)&saddr4; + daddr = (struct sockaddr *)&daddr4; + break; + case AF_INET6: + saddr6.sin6_family = AF_INET6; + saddr6.sin6_addr = in6addr_any; + + daddr6.sin6_family = AF_INET6; + if (!inet_pton(family, IP6_ADDR, &daddr6.sin6_addr)) + error(1, errno, "inet_pton failed: %s", IP6_ADDR); + daddr6.sin6_port = htons(PORT); + + sz = sizeof(saddr6); + saddr = (struct sockaddr *)&saddr6; + daddr = (struct sockaddr *)&daddr6; + break; + default: + error(1, 0, "Unsupported family %d", family); + /* clang does not recognize error() above as terminating + * the program, so it complains that saddr, daddr, sz are + * not initialized when this code path is taken. Silence it. + */ + return -1; + } + fd = socket(family, proto, 0); + if (fd < 0) + error(1, errno, "failed to create send socket"); + if (bind(fd, saddr, sz)) + error(1, errno, "failed to bind send socket"); + data[0] = 'a'; + ret = sendto(fd, data, 1, MSG_FASTOPEN, daddr, sz); + if (ret != 1) + error(1, errno, "failed to sendto"); + + return fd; +} + +static bool is_listen_fd(int fd) +{ + int i; + + for (i = 0; i < N_LISTEN; i++) { + if (rcv_fds[i] == fd) + return true; + } + return false; +} + +static int rotate_key(int fd) +{ + static int iter; + static uint32_t new_key[4]; + uint32_t keys[8]; + uint32_t tmp_key[4]; + int i; + int len = KEY_LENGTH * 2; + + if (iter < N_LISTEN) { + /* first set new key as backups */ + if (iter == 0) { + for (i = 0; i < ARRAY_SIZE(new_key); i++) + new_key[i] = rand(); + } + get_keys(fd, keys); + memcpy(keys + 4, new_key, KEY_LENGTH); + set_keys(fd, keys); + } else { + /* swap the keys */ + get_keys(fd, keys); + memcpy(tmp_key, keys + 4, KEY_LENGTH); + memcpy(keys + 4, keys, KEY_LENGTH); + memcpy(keys, tmp_key, KEY_LENGTH); + set_keys(fd, keys); + } + if (++iter >= (N_LISTEN * 2)) + iter = 0; +} + +static void run_one_test(int family) +{ + struct epoll_event ev; + int i, send_fd; + int n_loops = 10000; + int rotate_key_fd = 0; + int key_rotate_interval = 50; + int fd, epfd; + char buf[1]; + + build_rcv_fd(family, SOCK_STREAM, rcv_fds); + epfd = epoll_create(1); + if (epfd < 0) + error(1, errno, "failed to create epoll"); + ev.events = EPOLLIN; + for (i = 0; i < N_LISTEN; i++) { + ev.data.fd = rcv_fds[i]; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev)) + error(1, errno, "failed to register sock epoll"); + } + while (n_loops--) { + send_fd = connect_and_send(family, SOCK_STREAM); + if (do_rotate && ((n_loops % key_rotate_interval) == 0)) { + rotate_key(rcv_fds[rotate_key_fd]); + if (++rotate_key_fd >= N_LISTEN) + rotate_key_fd = 0; + } + while (1) { + i = epoll_wait(epfd, &ev, 1, -1); + if (i < 0) + error(1, errno, "epoll_wait failed"); + if (is_listen_fd(ev.data.fd)) { + fd = accept(ev.data.fd, NULL, NULL); + if (fd < 0) + error(1, errno, "failed to accept"); + ev.data.fd = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev)) + error(1, errno, "failed epoll add"); + continue; + } + i = recv(ev.data.fd, buf, sizeof(buf), 0); + if (i != 1) + error(1, errno, "failed recv data"); + if (epoll_ctl(epfd, EPOLL_CTL_DEL, ev.data.fd, NULL)) + error(1, errno, "failed epoll del"); + close(ev.data.fd); + break; + } + close(send_fd); + } + for (i = 0; i < N_LISTEN; i++) + close(rcv_fds[i]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "46sr")) != -1) { + switch (c) { + case '4': + do_ipv6 = false; + break; + case '6': + do_ipv6 = true; + break; + case 's': + do_sockopt = true; + break; + case 'r': + do_rotate = true; + key_len = KEY_LENGTH * 2; + break; + default: + error(1, 0, "%s: parse error", argv[0]); + } + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + proc_fd = open(PROC_FASTOPEN_KEY, O_RDWR); + if (proc_fd < 0) + error(1, errno, "Unable to open %s", PROC_FASTOPEN_KEY); + srand(time(NULL)); + if (do_ipv6) + run_one_test(AF_INET6); + else + run_one_test(AF_INET); + close(proc_fd); + fprintf(stderr, "PASS\n"); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.sh b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh new file mode 100755 index 000000000000..41476399e184 --- /dev/null +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# rotate TFO keys for ipv4/ipv6 and verify that the client does +# not present an invalid cookie. + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" sysctl -w net.ipv4.tcp_fastopen=3 \ + >/dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +do_test() { + # flush routes before each run, otherwise successive runs can + # initially present an old TFO cookie + ip netns exec "${NETNS}" ip tcp_metrics flush + ip netns exec "${NETNS}" ./tcp_fastopen_backup_key "$1" + val=$(ip netns exec "${NETNS}" nstat -az | \ + grep TcpExtTCPFastOpenPassiveFail | awk '{print $2}') + if [ $val -ne 0 ]; then + echo "FAIL: TcpExtTCPFastOpenPassiveFail non-zero" + return 1 + fi +} + +do_test "-4" +do_test "-6" +do_test "-4" +do_test "-6" +do_test "-4s" +do_test "-6s" +do_test "-4s" +do_test "-6s" +do_test "-4r" +do_test "-6r" +do_test "-4r" +do_test "-6r" +do_test "-4sr" +do_test "-6sr" +do_test "-4sr" +do_test "-6sr" +echo "all tests done" |