summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2015-03-16 15:55:47 -0400
committerDavid S. Miller <davem@davemloft.net>2015-03-16 15:55:47 -0400
commitb35f504a0c2631d57ebbb5b5b85a9055f62dce88 (patch)
treef7bf7650ead7d8af1e93d98114da5cd1bf658956
parentf00bbd219caa7b3cc3a51b5ce551d1bd56f19662 (diff)
parent13854e5a60461daee08ce99842b7f4d37553d911 (diff)
Merge branch 'listener_refactor'
Eric Dumazet says: ==================== inet: tcp listener refactoring, part 10 We are getting close to the point where request sockets will be hashed into generic hash table. Some followups are needed for netfilter and will be handled in next patch series. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/inet_connection_sock.h5
-rw-r--r--include/net/inet_sock.h5
-rw-r--r--include/net/request_sock.h13
-rw-r--r--include/net/sock.h9
-rw-r--r--net/core/request_sock.c4
-rw-r--r--net/core/sock.c15
-rw-r--r--net/ipv4/inet_connection_sock.c8
-rw-r--r--net/ipv4/inet_diag.c120
-rw-r--r--net/ipv4/inet_hashtables.c6
-rw-r--r--net/ipv4/syncookies.c10
-rw-r--r--net/ipv4/tcp_fastopen.c2
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv6/tcp_ipv6.c2
13 files changed, 99 insertions, 102 deletions
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index b9a6b0a94cc6..191feec60205 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -275,6 +275,11 @@ static inline void inet_csk_reqsk_queue_add(struct sock *sk,
struct sock *child)
{
reqsk_queue_add(&inet_csk(sk)->icsk_accept_queue, req, sk, child);
+ /* before letting lookups find us, make sure all req fields
+ * are committed to memory.
+ */
+ smp_wmb();
+ atomic_set(&req->rsk_refcnt, 1);
}
void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index b3053fdd871e..3d8c09abb097 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -255,6 +255,11 @@ static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops
ireq->opt = NULL;
atomic64_set(&ireq->ir_cookie, 0);
ireq->ireq_state = TCP_NEW_SYN_RECV;
+
+ /* Following is temporary. It is coupled with debugging
+ * helpers in reqsk_put() & reqsk_free()
+ */
+ atomic_set(&ireq->ireq_refcnt, 0);
}
return req;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 3275cf31f731..56dc2faba47e 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -82,19 +82,20 @@ static inline struct request_sock *inet_reqsk(struct sock *sk)
return (struct request_sock *)sk;
}
-static inline void __reqsk_free(struct request_sock *req)
-{
- kmem_cache_free(req->rsk_ops->slab, req);
-}
-
static inline void reqsk_free(struct request_sock *req)
{
+ /* temporary debugging */
+ WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 0);
+
req->rsk_ops->destructor(req);
- __reqsk_free(req);
+ kmem_cache_free(req->rsk_ops->slab, req);
}
static inline void reqsk_put(struct request_sock *req)
{
+ /* temporary debugging, until req sock are put into ehash table */
+ WARN_ON_ONCE(atomic_read(&req->rsk_refcnt) != 1);
+
if (atomic_dec_and_test(&req->rsk_refcnt))
reqsk_free(req);
}
diff --git a/include/net/sock.h b/include/net/sock.h
index f10832ca2e90..e0360f5a53e9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -67,6 +67,7 @@
#include <linux/atomic.h>
#include <net/dst.h>
#include <net/checksum.h>
+#include <net/tcp_states.h>
#include <linux/net_tstamp.h>
struct cgroup;
@@ -2218,6 +2219,14 @@ static inline struct sock *skb_steal_sock(struct sk_buff *skb)
return NULL;
}
+/* This helper checks if a socket is a full socket,
+ * ie _not_ a timewait or request socket.
+ */
+static inline bool sk_fullsock(const struct sock *sk)
+{
+ return (1 << sk->sk_state) & ~(TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV);
+}
+
void sock_enable_timestamp(struct sock *sk, int flag);
int sock_get_timestamp(struct sock *, struct timeval __user *);
int sock_get_timestampns(struct sock *, struct timespec __user *);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index 04db318e6218..e910317ef6d9 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -103,7 +103,7 @@ void reqsk_queue_destroy(struct request_sock_queue *queue)
while ((req = lopt->syn_table[i]) != NULL) {
lopt->syn_table[i] = req->dl_next;
lopt->qlen--;
- reqsk_free(req);
+ reqsk_put(req);
}
}
}
@@ -180,7 +180,7 @@ void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req,
*/
spin_unlock_bh(&fastopenq->lock);
sock_put(lsk);
- reqsk_free(req);
+ reqsk_put(req);
return;
}
/* Wait for 60secs before removing a req that has triggered RST.
diff --git a/net/core/sock.c b/net/core/sock.c
index 4bc42efb3e40..a950b54248da 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1661,21 +1661,6 @@ void sock_efree(struct sk_buff *skb)
}
EXPORT_SYMBOL(sock_efree);
-#ifdef CONFIG_INET
-void sock_edemux(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- if (sk->sk_state == TCP_TIME_WAIT)
- inet_twsk_put(inet_twsk(sk));
- else if (sk->sk_state == TCP_NEW_SYN_RECV)
- reqsk_put(inet_reqsk(sk));
- else
- sock_put(sk);
-}
-EXPORT_SYMBOL(sock_edemux);
-#endif
-
kuid_t sock_i_uid(struct sock *sk)
{
kuid_t uid;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 34581f928afa..3390ba6f96b2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -340,7 +340,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err)
out:
release_sock(sk);
if (req)
- __reqsk_free(req);
+ reqsk_put(req);
return newsk;
out_err:
newsk = NULL;
@@ -635,7 +635,7 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
/* Drop this request */
inet_csk_reqsk_queue_unlink(parent, req, reqp);
reqsk_queue_removed(queue, req);
- reqsk_free(req);
+ reqsk_put(req);
continue;
}
reqp = &req->dl_next;
@@ -837,7 +837,7 @@ void inet_csk_listen_stop(struct sock *sk)
sock_put(child);
sk_acceptq_removed(sk);
- __reqsk_free(req);
+ reqsk_put(req);
}
if (queue->fastopenq != NULL) {
/* Free all the reqs queued in rskq_rst_head. */
@@ -847,7 +847,7 @@ void inet_csk_listen_stop(struct sock *sk)
spin_unlock_bh(&queue->fastopenq->lock);
while ((req = acc_req) != NULL) {
acc_req = req->dl_next;
- __reqsk_free(req);
+ reqsk_put(req);
}
}
WARN_ON(sk->sk_ack_backlog);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index ac7b5c909fe7..e7ba59038c8d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -113,14 +113,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
return -EMSGSIZE;
r = nlmsg_data(nlh);
- BUG_ON((1 << sk->sk_state) & (TCPF_TIME_WAIT | TCPF_NEW_SYN_RECV));
+ BUG_ON(!sk_fullsock(sk));
inet_diag_msg_common_fill(r, sk);
r->idiag_state = sk->sk_state;
r->idiag_timer = 0;
r->idiag_retrans = 0;
-
if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown))
goto errout;
@@ -229,7 +228,6 @@ static int inet_csk_diag_fill(struct sock *sk,
static int inet_twsk_diag_fill(struct sock *sk,
struct sk_buff *skb,
- const struct inet_diag_req_v2 *req,
u32 portid, u32 seq, u16 nlmsg_flags,
const struct nlmsghdr *unlh)
{
@@ -265,6 +263,39 @@ static int inet_twsk_diag_fill(struct sock *sk,
return 0;
}
+static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb,
+ u32 portid, u32 seq, u16 nlmsg_flags,
+ const struct nlmsghdr *unlh)
+{
+ struct inet_diag_msg *r;
+ struct nlmsghdr *nlh;
+ long tmo;
+
+ nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
+ nlmsg_flags);
+ if (!nlh)
+ return -EMSGSIZE;
+
+ r = nlmsg_data(nlh);
+ inet_diag_msg_common_fill(r, sk);
+ r->idiag_state = TCP_SYN_RECV;
+ r->idiag_timer = 1;
+ r->idiag_retrans = inet_reqsk(sk)->num_retrans;
+
+ BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
+ offsetof(struct sock, sk_cookie));
+
+ tmo = inet_reqsk(sk)->expires - jiffies;
+ r->idiag_expires = (tmo >= 0) ? jiffies_to_msecs(tmo) : 0;
+ r->idiag_rqueue = 0;
+ r->idiag_wqueue = 0;
+ r->idiag_uid = 0;
+ r->idiag_inode = 0;
+
+ nlmsg_end(skb, nlh);
+ return 0;
+}
+
static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct inet_diag_req_v2 *r,
struct user_namespace *user_ns,
@@ -272,9 +303,13 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
const struct nlmsghdr *unlh)
{
if (sk->sk_state == TCP_TIME_WAIT)
- return inet_twsk_diag_fill(sk, skb, r, portid, seq,
+ return inet_twsk_diag_fill(sk, skb, portid, seq,
nlmsg_flags, unlh);
+ if (sk->sk_state == TCP_NEW_SYN_RECV)
+ return inet_req_diag_fill(sk, skb, portid, seq,
+ nlmsg_flags, unlh);
+
return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq,
nlmsg_flags, unlh);
}
@@ -502,7 +537,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
entry_fill_addrs(&entry, sk);
entry.sport = inet->inet_num;
entry.dport = ntohs(inet->inet_dport);
- entry.userlocks = (sk->sk_state != TCP_TIME_WAIT) ? sk->sk_userlocks : 0;
+ entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0;
return inet_diag_bc_run(bc, &entry);
}
@@ -661,61 +696,6 @@ static void twsk_build_assert(void)
#endif
}
-static int inet_twsk_diag_dump(struct sock *sk,
- struct sk_buff *skb,
- struct netlink_callback *cb,
- const struct inet_diag_req_v2 *r,
- const struct nlattr *bc)
-{
- twsk_build_assert();
-
- if (!inet_diag_bc_sk(bc, sk))
- return 0;
-
- return inet_twsk_diag_fill(sk, skb, r,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh);
-}
-
-static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
- struct request_sock *req,
- struct user_namespace *user_ns,
- u32 portid, u32 seq,
- const struct nlmsghdr *unlh)
-{
- const struct inet_request_sock *ireq = inet_rsk(req);
- struct inet_diag_msg *r;
- struct nlmsghdr *nlh;
- long tmo;
-
- nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r),
- NLM_F_MULTI);
- if (!nlh)
- return -EMSGSIZE;
-
- r = nlmsg_data(nlh);
- inet_diag_msg_common_fill(r, (struct sock *)ireq);
- r->idiag_state = TCP_SYN_RECV;
- r->idiag_timer = 1;
- r->idiag_retrans = req->num_retrans;
-
- BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) !=
- offsetof(struct sock, sk_cookie));
-
- tmo = req->expires - jiffies;
- if (tmo < 0)
- tmo = 0;
-
- r->idiag_expires = jiffies_to_msecs(tmo);
- r->idiag_rqueue = 0;
- r->idiag_wqueue = 0;
- r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
- r->idiag_inode = 0;
-
- nlmsg_end(skb, nlh);
- return 0;
-}
-
static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
struct netlink_callback *cb,
const struct inet_diag_req_v2 *r,
@@ -769,10 +749,10 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
continue;
}
- err = inet_diag_fill_req(skb, sk, req,
- sk_user_ns(NETLINK_CB(cb->skb).sk),
+ err = inet_req_diag_fill((struct sock *)req, skb,
NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, cb->nlh);
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, cb->nlh);
if (err < 0) {
cb->args[3] = j + 1;
cb->args[4] = reqnum;
@@ -903,10 +883,16 @@ skip_listen_ht:
if (r->id.idiag_dport != sk->sk_dport &&
r->id.idiag_dport)
goto next_normal;
- if (sk->sk_state == TCP_TIME_WAIT)
- res = inet_twsk_diag_dump(sk, skb, cb, r, bc);
- else
- res = inet_csk_diag_dump(sk, skb, cb, r, bc);
+ twsk_build_assert();
+
+ if (!inet_diag_bc_sk(bc, sk))
+ goto next_normal;
+
+ res = sk_diag_fill(sk, skb, r,
+ sk_user_ns(NETLINK_CB(cb->skb).sk),
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->nlh);
if (res < 0) {
spin_unlock_bh(lock);
goto done;
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 64401a2fdd33..c28bca4cc15b 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -269,6 +269,12 @@ void sock_gen_put(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sock_gen_put);
+void sock_edemux(struct sk_buff *skb)
+{
+ sock_gen_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_edemux);
+
struct sock *__inet_lookup_established(struct net *net,
struct inet_hashinfo *hashinfo,
const __be32 saddr, const __be16 sport,
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index f17db898ed26..5ae0c49f5e2e 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -219,9 +219,9 @@ int __cookie_v4_check(const struct iphdr *iph, const struct tcphdr *th,
}
EXPORT_SYMBOL_GPL(__cookie_v4_check);
-static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
- struct request_sock *req,
- struct dst_entry *dst)
+static struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
+ struct request_sock *req,
+ struct dst_entry *dst)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct sock *child;
@@ -357,7 +357,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
ireq->opt = tcp_v4_save_options(skb);
if (security_inet_conn_request(sk, skb, req)) {
- reqsk_free(req);
+ reqsk_put(req);
goto out;
}
@@ -378,7 +378,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
security_req_classify_flow(req, flowi4_to_flowi(&fl4));
rt = ip_route_output_key(sock_net(sk), &fl4);
if (IS_ERR(rt)) {
- reqsk_free(req);
+ reqsk_put(req);
goto out;
}
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index fe77417fc137..84381319e1bc 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -253,7 +253,7 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
fastopenq->rskq_rst_head = req1->dl_next;
fastopenq->qlen--;
spin_unlock(&fastopenq->lock);
- reqsk_free(req1);
+ reqsk_put(req1);
}
return true;
}
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 1f514a0c5e60..80067d5858b4 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
- if (sk->sk_state != TCP_TIME_WAIT) {
+ if (sk_fullsock(sk)) {
struct dst_entry *dst = sk->sk_rx_dst;
if (dst)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d89f028dc8c4..e4761b22307b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1583,7 +1583,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)
if (sk) {
skb->sk = sk;
skb->destructor = sock_edemux;
- if (sk->sk_state != TCP_TIME_WAIT) {
+ if (sk_fullsock(sk)) {
struct dst_entry *dst = sk->sk_rx_dst;
if (dst)