summaryrefslogtreecommitdiff
path: root/net/tipc/socket.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 21:40:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-10-08 21:40:54 -0400
commit35a9ad8af0bb0fa3525e6d0d20e32551d226f38e (patch)
tree15b4b33206818886d9cff371fd2163e073b70568 /net/tipc/socket.c
parentd5935b07da53f74726e2a65dd4281d0f2c70e5d4 (diff)
parent64b1f00a0830e1c53874067273a096b228d83d36 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Most notable changes in here: 1) By far the biggest accomplishment, thanks to a large range of contributors, is the addition of multi-send for transmit. This is the result of discussions back in Chicago, and the hard work of several individuals. Now, when the ->ndo_start_xmit() method of a driver sees skb->xmit_more as true, it can choose to defer the doorbell telling the driver to start processing the new TX queue entires. skb->xmit_more means that the generic networking is guaranteed to call the driver immediately with another SKB to send. There is logic added to the qdisc layer to dequeue multiple packets at a time, and the handling mis-predicted offloads in software is now done with no locks held. Finally, pktgen is extended to have a "burst" parameter that can be used to test a multi-send implementation. Several drivers have xmit_more support: i40e, igb, ixgbe, mlx4, virtio_net Adding support is almost trivial, so export more drivers to support this optimization soon. I want to thank, in no particular or implied order, Jesper Dangaard Brouer, Eric Dumazet, Alexander Duyck, Tom Herbert, Jamal Hadi Salim, John Fastabend, Florian Westphal, Daniel Borkmann, David Tat, Hannes Frederic Sowa, and Rusty Russell. 2) PTP and timestamping support in bnx2x, from Michal Kalderon. 3) Allow adjusting the rx_copybreak threshold for a driver via ethtool, and add rx_copybreak support to enic driver. From Govindarajulu Varadarajan. 4) Significant enhancements to the generic PHY layer and the bcm7xxx driver in particular (EEE support, auto power down, etc.) from Florian Fainelli. 5) Allow raw buffers to be used for flow dissection, allowing drivers to determine the optimal "linear pull" size for devices that DMA into pools of pages. The objective is to get exactly the necessary amount of headers into the linear SKB area pre-pulled, but no more. The new interface drivers use is eth_get_headlen(). From WANG Cong, with driver conversions (several had their own by-hand duplicated implementations) by Alexander Duyck and Eric Dumazet. 6) Support checksumming more smoothly and efficiently for encapsulations, and add "foo over UDP" facility. From Tom Herbert. 7) Add Broadcom SF2 switch driver to DSA layer, from Florian Fainelli. 8) eBPF now can load programs via a system call and has an extensive testsuite. Alexei Starovoitov and Daniel Borkmann. 9) Major overhaul of the packet scheduler to use RCU in several major areas such as the classifiers and rate estimators. From John Fastabend. 10) Add driver for Intel FM10000 Ethernet Switch, from Alexander Duyck. 11) Rearrange TCP_SKB_CB() to reduce cache line misses, from Eric Dumazet. 12) Add Datacenter TCP congestion control algorithm support, From Florian Westphal. 13) Reorganize sk_buff so that __copy_skb_header() is significantly faster. From Eric Dumazet" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1558 commits) netlabel: directly return netlbl_unlabel_genl_init() net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers net: description of dma_cookie cause make xmldocs warning cxgb4: clean up a type issue cxgb4: potential shift wrapping bug i40e: skb->xmit_more support net: fs_enet: Add NAPI TX net: fs_enet: Remove non NAPI RX r8169:add support for RTL8168EP net_sched: copy exts->type in tcf_exts_change() wimax: convert printk to pr_foo() af_unix: remove 0 assignment on static ipv6: Do not warn for informational ICMP messages, regardless of type. Update Intel Ethernet Driver maintainers list bridge: Save frag_max_size between PRE_ROUTING and POST_ROUTING tipc: fix bug in multicast congestion handling net: better IFF_XMIT_DST_RELEASE support net/mlx4_en: remove NETDEV_TX_BUSY 3c59x: fix bad split of cpu_to_le32(pci_map_single()) net: bcmgenet: fix Tx ring priority programming ...
Diffstat (limited to 'net/tipc/socket.c')
-rw-r--r--net/tipc/socket.c884
1 files changed, 740 insertions, 144 deletions
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ff8c8118d56e..75275c5cf929 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -35,17 +35,67 @@
*/
#include "core.h"
-#include "port.h"
#include "name_table.h"
#include "node.h"
#include "link.h"
#include <linux/export.h>
+#include "config.h"
+#include "socket.h"
#define SS_LISTENING -1 /* socket is listening */
#define SS_READY -2 /* socket is connectionless */
-#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
-#define TIPC_FWD_MSG 1
+#define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */
+#define CONN_PROBING_INTERVAL 3600000 /* [ms] => 1 h */
+#define TIPC_FWD_MSG 1
+#define TIPC_CONN_OK 0
+#define TIPC_CONN_PROBING 1
+
+/**
+ * struct tipc_sock - TIPC socket structure
+ * @sk: socket - interacts with 'port' and with user via the socket API
+ * @connected: non-zero if port is currently connected to a peer port
+ * @conn_type: TIPC type used when connection was established
+ * @conn_instance: TIPC instance used when connection was established
+ * @published: non-zero if port has one or more associated names
+ * @max_pkt: maximum packet size "hint" used when building messages sent by port
+ * @ref: unique reference to port in TIPC object registry
+ * @phdr: preformatted message header used when sending messages
+ * @port_list: adjacent ports in TIPC's global list of ports
+ * @publications: list of publications for port
+ * @pub_count: total # of publications port has made during its lifetime
+ * @probing_state:
+ * @probing_interval:
+ * @timer:
+ * @port: port - interacts with 'sk' and with the rest of the TIPC stack
+ * @peer_name: the peer of the connection, if any
+ * @conn_timeout: the time we can wait for an unresponded setup request
+ * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue
+ * @link_cong: non-zero if owner must sleep because of link congestion
+ * @sent_unacked: # messages sent by socket, and not yet acked by peer
+ * @rcv_unacked: # messages read by user, but not yet acked back to peer
+ */
+struct tipc_sock {
+ struct sock sk;
+ int connected;
+ u32 conn_type;
+ u32 conn_instance;
+ int published;
+ u32 max_pkt;
+ u32 ref;
+ struct tipc_msg phdr;
+ struct list_head sock_list;
+ struct list_head publications;
+ u32 pub_count;
+ u32 probing_state;
+ u32 probing_interval;
+ struct timer_list timer;
+ uint conn_timeout;
+ atomic_t dupl_rcvcnt;
+ bool link_cong;
+ uint sent_unacked;
+ uint rcv_unacked;
+};
static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb);
static void tipc_data_ready(struct sock *sk);
@@ -53,6 +103,16 @@ static void tipc_write_space(struct sock *sk);
static int tipc_release(struct socket *sock);
static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags);
static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p);
+static void tipc_sk_timeout(unsigned long ref);
+static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq);
+static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq);
+static u32 tipc_sk_ref_acquire(struct tipc_sock *tsk);
+static void tipc_sk_ref_discard(u32 ref);
+static struct tipc_sock *tipc_sk_get(u32 ref);
+static struct tipc_sock *tipc_sk_get_next(u32 *ref);
+static void tipc_sk_put(struct tipc_sock *tsk);
static const struct proto_ops packet_ops;
static const struct proto_ops stream_ops;
@@ -106,24 +166,75 @@ static struct proto tipc_proto_kern;
* - port reference
*/
-#include "socket.h"
+static u32 tsk_peer_node(struct tipc_sock *tsk)
+{
+ return msg_destnode(&tsk->phdr);
+}
+
+static u32 tsk_peer_port(struct tipc_sock *tsk)
+{
+ return msg_destport(&tsk->phdr);
+}
+
+static bool tsk_unreliable(struct tipc_sock *tsk)
+{
+ return msg_src_droppable(&tsk->phdr) != 0;
+}
+
+static void tsk_set_unreliable(struct tipc_sock *tsk, bool unreliable)
+{
+ msg_set_src_droppable(&tsk->phdr, unreliable ? 1 : 0);
+}
+
+static bool tsk_unreturnable(struct tipc_sock *tsk)
+{
+ return msg_dest_droppable(&tsk->phdr) != 0;
+}
+
+static void tsk_set_unreturnable(struct tipc_sock *tsk, bool unreturnable)
+{
+ msg_set_dest_droppable(&tsk->phdr, unreturnable ? 1 : 0);
+}
+
+static int tsk_importance(struct tipc_sock *tsk)
+{
+ return msg_importance(&tsk->phdr);
+}
+
+static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+{
+ if (imp > TIPC_CRITICAL_IMPORTANCE)
+ return -EINVAL;
+ msg_set_importance(&tsk->phdr, (u32)imp);
+ return 0;
+}
+
+static struct tipc_sock *tipc_sk(const struct sock *sk)
+{
+ return container_of(sk, struct tipc_sock, sk);
+}
+
+static int tsk_conn_cong(struct tipc_sock *tsk)
+{
+ return tsk->sent_unacked >= TIPC_FLOWCTRL_WIN;
+}
/**
- * advance_rx_queue - discard first buffer in socket receive queue
+ * tsk_advance_rx_queue - discard first buffer in socket receive queue
*
* Caller must hold socket lock
*/
-static void advance_rx_queue(struct sock *sk)
+static void tsk_advance_rx_queue(struct sock *sk)
{
kfree_skb(__skb_dequeue(&sk->sk_receive_queue));
}
/**
- * reject_rx_queue - reject all buffers in socket receive queue
+ * tsk_rej_rx_queue - reject all buffers in socket receive queue
*
* Caller must hold socket lock
*/
-static void reject_rx_queue(struct sock *sk)
+static void tsk_rej_rx_queue(struct sock *sk)
{
struct sk_buff *buf;
u32 dnode;
@@ -134,6 +245,38 @@ static void reject_rx_queue(struct sock *sk)
}
}
+/* tsk_peer_msg - verify if message was sent by connected port's peer
+ *
+ * Handles cases where the node's network address has changed from
+ * the default of <0.0.0> to its configured setting.
+ */
+static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg)
+{
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 orig_node;
+ u32 peer_node;
+
+ if (unlikely(!tsk->connected))
+ return false;
+
+ if (unlikely(msg_origport(msg) != peer_port))
+ return false;
+
+ orig_node = msg_orignode(msg);
+ peer_node = tsk_peer_node(tsk);
+
+ if (likely(orig_node == peer_node))
+ return true;
+
+ if (!orig_node && (peer_node == tipc_own_addr))
+ return true;
+
+ if (!peer_node && (orig_node == tipc_own_addr))
+ return true;
+
+ return false;
+}
+
/**
* tipc_sk_create - create a TIPC socket
* @net: network namespace (must be default network)
@@ -153,7 +296,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
socket_state state;
struct sock *sk;
struct tipc_sock *tsk;
- struct tipc_port *port;
+ struct tipc_msg *msg;
u32 ref;
/* Validate arguments */
@@ -188,20 +331,24 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
return -ENOMEM;
tsk = tipc_sk(sk);
- port = &tsk->port;
-
- ref = tipc_port_init(port, TIPC_LOW_IMPORTANCE);
+ ref = tipc_sk_ref_acquire(tsk);
if (!ref) {
- pr_warn("Socket registration failed, ref. table exhausted\n");
- sk_free(sk);
+ pr_warn("Socket create failed; reference table exhausted\n");
return -ENOMEM;
}
+ tsk->max_pkt = MAX_PKT_DEFAULT;
+ tsk->ref = ref;
+ INIT_LIST_HEAD(&tsk->publications);
+ msg = &tsk->phdr;
+ tipc_msg_init(msg, TIPC_LOW_IMPORTANCE, TIPC_NAMED_MSG,
+ NAMED_H_SIZE, 0);
+ msg_set_origport(msg, ref);
/* Finish initializing socket data structures */
sock->ops = ops;
sock->state = state;
-
sock_init_data(sock, sk);
+ k_init_timer(&tsk->timer, (Handler)tipc_sk_timeout, ref);
sk->sk_backlog_rcv = tipc_backlog_rcv;
sk->sk_rcvbuf = sysctl_tipc_rmem[1];
sk->sk_data_ready = tipc_data_ready;
@@ -209,12 +356,11 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
tsk->conn_timeout = CONN_TIMEOUT_DEFAULT;
tsk->sent_unacked = 0;
atomic_set(&tsk->dupl_rcvcnt, 0);
- tipc_port_unlock(port);
if (sock->state == SS_READY) {
- tipc_port_set_unreturnable(port, true);
+ tsk_set_unreturnable(tsk, true);
if (sock->type == SOCK_DGRAM)
- tipc_port_set_unreliable(port, true);
+ tsk_set_unreliable(tsk, true);
}
return 0;
}
@@ -308,7 +454,6 @@ static int tipc_release(struct socket *sock)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk;
- struct tipc_port *port;
struct sk_buff *buf;
u32 dnode;
@@ -320,13 +465,13 @@ static int tipc_release(struct socket *sock)
return 0;
tsk = tipc_sk(sk);
- port = &tsk->port;
lock_sock(sk);
/*
* Reject all unreceived messages, except on an active connection
* (which disconnects locally & sends a 'FIN+' to peer)
*/
+ dnode = tsk_peer_node(tsk);
while (sock->state != SS_DISCONNECTING) {
buf = __skb_dequeue(&sk->sk_receive_queue);
if (buf == NULL)
@@ -337,17 +482,27 @@ static int tipc_release(struct socket *sock)
if ((sock->state == SS_CONNECTING) ||
(sock->state == SS_CONNECTED)) {
sock->state = SS_DISCONNECTING;
- tipc_port_disconnect(port->ref);
+ tsk->connected = 0;
+ tipc_node_remove_conn(dnode, tsk->ref);
}
if (tipc_msg_reverse(buf, &dnode, TIPC_ERR_NO_PORT))
tipc_link_xmit(buf, dnode, 0);
}
}
- /* Destroy TIPC port; also disconnects an active connection and
- * sends a 'FIN-' to peer.
- */
- tipc_port_destroy(port);
+ tipc_sk_withdraw(tsk, 0, NULL);
+ tipc_sk_ref_discard(tsk->ref);
+ k_cancel_timer(&tsk->timer);
+ if (tsk->connected) {
+ buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
+ SHORT_H_SIZE, 0, dnode, tipc_own_addr,
+ tsk_peer_port(tsk),
+ tsk->ref, TIPC_ERR_NO_PORT);
+ if (buf)
+ tipc_link_xmit(buf, dnode, tsk->ref);
+ tipc_node_remove_conn(dnode, tsk->ref);
+ }
+ k_term_timer(&tsk->timer);
/* Discard any remaining (connection-based) messages in receive queue */
__skb_queue_purge(&sk->sk_receive_queue);
@@ -355,7 +510,6 @@ static int tipc_release(struct socket *sock)
/* Reject any messages that accumulated in backlog queue */
sock->state = SS_DISCONNECTING;
release_sock(sk);
-
sock_put(sk);
sock->sk = NULL;
@@ -387,7 +541,7 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
lock_sock(sk);
if (unlikely(!uaddr_len)) {
- res = tipc_withdraw(&tsk->port, 0, NULL);
+ res = tipc_sk_withdraw(tsk, 0, NULL);
goto exit;
}
@@ -415,8 +569,8 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr,
}
res = (addr->scope > 0) ?
- tipc_publish(&tsk->port, addr->scope, &addr->addr.nameseq) :
- tipc_withdraw(&tsk->port, -addr->scope, &addr->addr.nameseq);
+ tipc_sk_publish(tsk, addr->scope, &addr->addr.nameseq) :
+ tipc_sk_withdraw(tsk, -addr->scope, &addr->addr.nameseq);
exit:
release_sock(sk);
return res;
@@ -446,10 +600,10 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
if ((sock->state != SS_CONNECTED) &&
((peer != 2) || (sock->state != SS_DISCONNECTING)))
return -ENOTCONN;
- addr->addr.id.ref = tipc_port_peerport(&tsk->port);
- addr->addr.id.node = tipc_port_peernode(&tsk->port);
+ addr->addr.id.ref = tsk_peer_port(tsk);
+ addr->addr.id.node = tsk_peer_node(tsk);
} else {
- addr->addr.id.ref = tsk->port.ref;
+ addr->addr.id.ref = tsk->ref;
addr->addr.id.node = tipc_own_addr;
}
@@ -518,7 +672,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock,
break;
case SS_READY:
case SS_CONNECTED:
- if (!tsk->link_cong && !tipc_sk_conn_cong(tsk))
+ if (!tsk->link_cong && !tsk_conn_cong(tsk))
mask |= POLLOUT;
/* fall thru' */
case SS_CONNECTING:
@@ -549,7 +703,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq,
struct iovec *iov, size_t dsz, long timeo)
{
struct sock *sk = sock->sk;
- struct tipc_msg *mhdr = &tipc_sk(sk)->port.phdr;
+ struct tipc_msg *mhdr = &tipc_sk(sk)->phdr;
struct sk_buff *buf;
uint mtu;
int rc;
@@ -579,6 +733,7 @@ new_mtu:
goto new_mtu;
if (rc != -ELINKCONG)
break;
+ tipc_sk(sk)->link_cong = 1;
rc = tipc_wait_for_sndmsg(sock, &timeo);
if (rc)
kfree_skb_list(buf);
@@ -638,20 +793,19 @@ static int tipc_sk_proto_rcv(struct tipc_sock *tsk, u32 *dnode,
struct sk_buff *buf)
{
struct tipc_msg *msg = buf_msg(buf);
- struct tipc_port *port = &tsk->port;
int conn_cong;
/* Ignore if connection cannot be validated: */
- if (!port->connected || !tipc_port_peer_msg(port, msg))
+ if (!tsk_peer_msg(tsk, msg))
goto exit;
- port->probing_state = TIPC_CONN_OK;
+ tsk->probing_state = TIPC_CONN_OK;
if (msg_type(msg) == CONN_ACK) {
- conn_cong = tipc_sk_conn_cong(tsk);
+ conn_cong = tsk_conn_cong(tsk);
tsk->sent_unacked -= msg_msgcnt(msg);
if (conn_cong)
- tipc_sock_wakeup(tsk);
+ tsk->sk.sk_write_space(&tsk->sk);
} else if (msg_type(msg) == CONN_PROBE) {
if (!tipc_msg_reverse(buf, dnode, TIPC_OK))
return TIPC_OK;
@@ -742,8 +896,7 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
- struct tipc_msg *mhdr = &port->phdr;
+ struct tipc_msg *mhdr = &tsk->phdr;
struct iovec *iov = m->msg_iov;
u32 dnode, dport;
struct sk_buff *buf;
@@ -774,13 +927,13 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
rc = -EISCONN;
goto exit;
}
- if (tsk->port.published) {
+ if (tsk->published) {
rc = -EOPNOTSUPP;
goto exit;
}
if (dest->addrtype == TIPC_ADDR_NAME) {
- tsk->port.conn_type = dest->addr.name.name.type;
- tsk->port.conn_instance = dest->addr.name.name.instance;
+ tsk->conn_type = dest->addr.name.name.type;
+ tsk->conn_instance = dest->addr.name.name.instance;
}
}
rc = dest_name_check(dest, m);
@@ -820,13 +973,14 @@ static int tipc_sendmsg(struct kiocb *iocb, struct socket *sock,
}
new_mtu:
- mtu = tipc_node_get_mtu(dnode, tsk->port.ref);
+ mtu = tipc_node_get_mtu(dnode, tsk->ref);
rc = tipc_msg_build(mhdr, iov, 0, dsz, mtu, &buf);
if (rc < 0)
goto exit;
do {
- rc = tipc_link_xmit(buf, dnode, tsk->port.ref);
+ TIPC_SKB_CB(buf)->wakeup_pending = tsk->link_cong;
+ rc = tipc_link_xmit(buf, dnode, tsk->ref);
if (likely(rc >= 0)) {
if (sock->state != SS_READY)
sock->state = SS_CONNECTING;
@@ -835,10 +989,9 @@ new_mtu:
}
if (rc == -EMSGSIZE)
goto new_mtu;
-
if (rc != -ELINKCONG)
break;
-
+ tsk->link_cong = 1;
rc = tipc_wait_for_sndmsg(sock, &timeo);
if (rc)
kfree_skb_list(buf);
@@ -873,8 +1026,8 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p)
prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
done = sk_wait_event(sk, timeo_p,
(!tsk->link_cong &&
- !tipc_sk_conn_cong(tsk)) ||
- !tsk->port.connected);
+ !tsk_conn_cong(tsk)) ||
+ !tsk->connected);
finish_wait(sk_sleep(sk), &wait);
} while (!done);
return 0;
@@ -897,11 +1050,10 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
- struct tipc_msg *mhdr = &port->phdr;
+ struct tipc_msg *mhdr = &tsk->phdr;
struct sk_buff *buf;
DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name);
- u32 ref = port->ref;
+ u32 ref = tsk->ref;
int rc = -EINVAL;
long timeo;
u32 dnode;
@@ -929,16 +1081,16 @@ static int tipc_send_stream(struct kiocb *iocb, struct socket *sock,
}
timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT);
- dnode = tipc_port_peernode(port);
+ dnode = tsk_peer_node(tsk);
next:
- mtu = port->max_pkt;
+ mtu = tsk->max_pkt;
send = min_t(uint, dsz - sent, TIPC_MAX_USER_MSG_SIZE);
rc = tipc_msg_build(mhdr, m->msg_iov, sent, send, mtu, &buf);
if (unlikely(rc < 0))
goto exit;
do {
- if (likely(!tipc_sk_conn_cong(tsk))) {
+ if (likely(!tsk_conn_cong(tsk))) {
rc = tipc_link_xmit(buf, dnode, ref);
if (likely(!rc)) {
tsk->sent_unacked++;
@@ -948,11 +1100,12 @@ next:
goto next;
}
if (rc == -EMSGSIZE) {
- port->max_pkt = tipc_node_get_mtu(dnode, ref);
+ tsk->max_pkt = tipc_node_get_mtu(dnode, ref);
goto next;
}
if (rc != -ELINKCONG)
break;
+ tsk->link_cong = 1;
}
rc = tipc_wait_for_sndpkt(sock, &timeo);
if (rc)
@@ -984,29 +1137,25 @@ static int tipc_send_packet(struct kiocb *iocb, struct socket *sock,
return tipc_send_stream(iocb, sock, m, dsz);
}
-/**
- * auto_connect - complete connection setup to a remote port
- * @tsk: tipc socket structure
- * @msg: peer's response message
- *
- * Returns 0 on success, errno otherwise
+/* tipc_sk_finish_conn - complete the setup of a connection
*/
-static int auto_connect(struct tipc_sock *tsk, struct tipc_msg *msg)
+static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port,
+ u32 peer_node)
{
- struct tipc_port *port = &tsk->port;
- struct socket *sock = tsk->sk.sk_socket;
- struct tipc_portid peer;
-
- peer.ref = msg_origport(msg);
- peer.node = msg_orignode(msg);
-
- __tipc_port_connect(port->ref, port, &peer);
-
- if (msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)
- return -EINVAL;
- msg_set_importance(&port->phdr, (u32)msg_importance(msg));
- sock->state = SS_CONNECTED;
- return 0;
+ struct tipc_msg *msg = &tsk->phdr;
+
+ msg_set_destnode(msg, peer_node);
+ msg_set_destport(msg, peer_port);
+ msg_set_type(msg, TIPC_CONN_MSG);
+ msg_set_lookup_scope(msg, 0);
+ msg_set_hdr_sz(msg, SHORT_H_SIZE);
+
+ tsk->probing_interval = CONN_PROBING_INTERVAL;
+ tsk->probing_state = TIPC_CONN_OK;
+ tsk->connected = 1;
+ k_start_timer(&tsk->timer, tsk->probing_interval);
+ tipc_node_add_conn(peer_node, tsk->ref, peer_port);
+ tsk->max_pkt = tipc_node_get_mtu(peer_node, tsk->ref);
}
/**
@@ -1033,17 +1182,17 @@ static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg)
}
/**
- * anc_data_recv - optionally capture ancillary data for received message
+ * tipc_sk_anc_data_recv - optionally capture ancillary data for received message
* @m: descriptor for message info
* @msg: received message header
- * @tport: TIPC port associated with message
+ * @tsk: TIPC port associated with message
*
* Note: Ancillary data is not captured if not requested by receiver.
*
* Returns 0 if successful, otherwise errno
*/
-static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
- struct tipc_port *tport)
+static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
+ struct tipc_sock *tsk)
{
u32 anc_data[3];
u32 err;
@@ -1086,10 +1235,10 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
anc_data[2] = msg_nameupper(msg);
break;
case TIPC_CONN_MSG:
- has_name = (tport->conn_type != 0);
- anc_data[0] = tport->conn_type;
- anc_data[1] = tport->conn_instance;
- anc_data[2] = tport->conn_instance;
+ has_name = (tsk->conn_type != 0);
+ anc_data[0] = tsk->conn_type;
+ anc_data[1] = tsk->conn_instance;
+ anc_data[2] = tsk->conn_instance;
break;
default:
has_name = 0;
@@ -1103,6 +1252,24 @@ static int anc_data_recv(struct msghdr *m, struct tipc_msg *msg,
return 0;
}
+static void tipc_sk_send_ack(struct tipc_sock *tsk, uint ack)
+{
+ struct sk_buff *buf = NULL;
+ struct tipc_msg *msg;
+ u32 peer_port = tsk_peer_port(tsk);
+ u32 dnode = tsk_peer_node(tsk);
+
+ if (!tsk->connected)
+ return;
+ buf = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode,
+ tipc_own_addr, peer_port, tsk->ref, TIPC_OK);
+ if (!buf)
+ return;
+ msg = buf_msg(buf);
+ msg_set_msgcnt(msg, ack);
+ tipc_link_xmit(buf, dnode, msg_link_selector(msg));
+}
+
static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop)
{
struct sock *sk = sock->sk;
@@ -1153,7 +1320,6 @@ static int tipc_recvmsg(struct kiocb *iocb, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1188,7 +1354,7 @@ restart:
/* Discard an empty non-errored message & try again */
if ((!sz) && (!err)) {
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
goto restart;
}
@@ -1196,7 +1362,7 @@ restart:
set_orig_addr(m, msg);
/* Capture ancillary data (optional) */
- res = anc_data_recv(m, msg, port);
+ res = tipc_sk_anc_data_recv(m, msg, tsk);
if (res)
goto exit;
@@ -1223,10 +1389,10 @@ restart:
if (likely(!(flags & MSG_PEEK))) {
if ((sock->state != SS_READY) &&
(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
- tipc_acknowledge(port->ref, tsk->rcv_unacked);
+ tipc_sk_send_ack(tsk, tsk->rcv_unacked);
tsk->rcv_unacked = 0;
}
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
}
exit:
release_sock(sk);
@@ -1250,7 +1416,6 @@ static int tipc_recv_stream(struct kiocb *iocb, struct socket *sock,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
struct tipc_msg *msg;
long timeo;
@@ -1288,14 +1453,14 @@ restart:
/* Discard an empty non-errored message & try again */
if ((!sz) && (!err)) {
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
goto restart;
}
/* Optionally capture sender's address & ancillary data of first msg */
if (sz_copied == 0) {
set_orig_addr(m, msg);
- res = anc_data_recv(m, msg, port);
+ res = tipc_sk_anc_data_recv(m, msg, tsk);
if (res)
goto exit;
}
@@ -1334,10 +1499,10 @@ restart:
/* Consume received message (optional) */
if (likely(!(flags & MSG_PEEK))) {
if (unlikely(++tsk->rcv_unacked >= TIPC_CONNACK_INTV)) {
- tipc_acknowledge(port->ref, tsk->rcv_unacked);
+ tipc_sk_send_ack(tsk, tsk->rcv_unacked);
tsk->rcv_unacked = 0;
}
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
}
/* Loop around if more data is required */
@@ -1396,12 +1561,9 @@ static void tipc_data_ready(struct sock *sk)
static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
{
struct sock *sk = &tsk->sk;
- struct tipc_port *port = &tsk->port;
struct socket *sock = sk->sk_socket;
struct tipc_msg *msg = buf_msg(*buf);
-
int retval = -TIPC_ERR_NO_PORT;
- int res;
if (msg_mcast(msg))
return retval;
@@ -1409,16 +1571,23 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
switch ((int)sock->state) {
case SS_CONNECTED:
/* Accept only connection-based messages sent by peer */
- if (msg_connected(msg) && tipc_port_peer_msg(port, msg)) {
+ if (tsk_peer_msg(tsk, msg)) {
if (unlikely(msg_errcode(msg))) {
sock->state = SS_DISCONNECTING;
- __tipc_port_disconnect(port);
+ tsk->connected = 0;
+ /* let timer expire on it's own */
+ tipc_node_remove_conn(tsk_peer_node(tsk),
+ tsk->ref);
}
retval = TIPC_OK;
}
break;
case SS_CONNECTING:
/* Accept only ACK or NACK message */
+
+ if (unlikely(!msg_connected(msg)))
+ break;
+
if (unlikely(msg_errcode(msg))) {
sock->state = SS_DISCONNECTING;
sk->sk_err = ECONNREFUSED;
@@ -1426,17 +1595,17 @@ static int filter_connect(struct tipc_sock *tsk, struct sk_buff **buf)
break;
}
- if (unlikely(!msg_connected(msg)))
- break;
-
- res = auto_connect(tsk, msg);
- if (res) {
+ if (unlikely(msg_importance(msg) > TIPC_CRITICAL_IMPORTANCE)) {
sock->state = SS_DISCONNECTING;
- sk->sk_err = -res;
+ sk->sk_err = EINVAL;
retval = TIPC_OK;
break;
}
+ tipc_sk_finish_conn(tsk, msg_origport(msg), msg_orignode(msg));
+ msg_set_importance(&tsk->phdr, msg_importance(msg));
+ sock->state = SS_CONNECTED;
+
/* If an incoming message is an 'ACK-', it should be
* discarded here because it doesn't contain useful
* data. In addition, we should try to wake up
@@ -1518,6 +1687,13 @@ static int filter_rcv(struct sock *sk, struct sk_buff *buf)
if (unlikely(msg_user(msg) == CONN_MANAGER))
return tipc_sk_proto_rcv(tsk, &onode, buf);
+ if (unlikely(msg_user(msg) == SOCK_WAKEUP)) {
+ kfree_skb(buf);
+ tsk->link_cong = 0;
+ sk->sk_write_space(sk);
+ return TIPC_OK;
+ }
+
/* Reject message if it is wrong sort of message for socket */
if (msg_type(msg) > TIPC_DIRECT_MSG)
return -TIPC_ERR_NO_PORT;
@@ -1585,7 +1761,6 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *buf)
int tipc_sk_rcv(struct sk_buff *buf)
{
struct tipc_sock *tsk;
- struct tipc_port *port;
struct sock *sk;
u32 dport = msg_destport(buf_msg(buf));
int rc = TIPC_OK;
@@ -1593,13 +1768,11 @@ int tipc_sk_rcv(struct sk_buff *buf)
u32 dnode;
/* Validate destination and message */
- port = tipc_port_lock(dport);
- if (unlikely(!port)) {
+ tsk = tipc_sk_get(dport);
+ if (unlikely(!tsk)) {
rc = tipc_msg_eval(buf, &dnode);
goto exit;
}
-
- tsk = tipc_port_to_sock(port);
sk = &tsk->sk;
/* Queue message */
@@ -1615,8 +1788,7 @@ int tipc_sk_rcv(struct sk_buff *buf)
rc = -TIPC_ERR_OVERLOAD;
}
bh_unlock_sock(sk);
- tipc_port_unlock(port);
-
+ tipc_sk_put(tsk);
if (likely(!rc))
return 0;
exit:
@@ -1803,10 +1975,8 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
{
struct sock *new_sk, *sk = sock->sk;
struct sk_buff *buf;
- struct tipc_port *new_port;
+ struct tipc_sock *new_tsock;
struct tipc_msg *msg;
- struct tipc_portid peer;
- u32 new_ref;
long timeo;
int res;
@@ -1828,8 +1998,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
goto exit;
new_sk = new_sock->sk;
- new_port = &tipc_sk(new_sk)->port;
- new_ref = new_port->ref;
+ new_tsock = tipc_sk(new_sk);
msg = buf_msg(buf);
/* we lock on new_sk; but lockdep sees the lock on sk */
@@ -1839,18 +2008,16 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
* Reject any stray messages received by new socket
* before the socket lock was taken (very, very unlikely)
*/
- reject_rx_queue(new_sk);
+ tsk_rej_rx_queue(new_sk);
/* Connect new socket to it's peer */
- peer.ref = msg_origport(msg);
- peer.node = msg_orignode(msg);
- tipc_port_connect(new_ref, &peer);
+ tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
new_sock->state = SS_CONNECTED;
- tipc_port_set_importance(new_port, msg_importance(msg));
+ tsk_set_importance(new_tsock, msg_importance(msg));
if (msg_named(msg)) {
- new_port->conn_type = msg_nametype(msg);
- new_port->conn_instance = msg_nameinst(msg);
+ new_tsock->conn_type = msg_nametype(msg);
+ new_tsock->conn_instance = msg_nameinst(msg);
}
/*
@@ -1860,7 +2027,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags)
if (!msg_data_sz(msg)) {
struct msghdr m = {NULL,};
- advance_rx_queue(sk);
+ tsk_advance_rx_queue(sk);
tipc_send_packet(NULL, new_sock, &m, 0);
} else {
__skb_dequeue(&sk->sk_receive_queue);
@@ -1886,9 +2053,8 @@ static int tipc_shutdown(struct socket *sock, int how)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
struct sk_buff *buf;
- u32 peer;
+ u32 dnode;
int res;
if (how != SHUT_RDWR)
@@ -1908,15 +2074,21 @@ restart:
kfree_skb(buf);
goto restart;
}
- tipc_port_disconnect(port->ref);
- if (tipc_msg_reverse(buf, &peer, TIPC_CONN_SHUTDOWN))
- tipc_link_xmit(buf, peer, 0);
+ if (tipc_msg_reverse(buf, &dnode, TIPC_CONN_SHUTDOWN))
+ tipc_link_xmit(buf, dnode, tsk->ref);
+ tipc_node_remove_conn(dnode, tsk->ref);
} else {
- tipc_port_shutdown(port->ref);
+ dnode = tsk_peer_node(tsk);
+ buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE,
+ TIPC_CONN_MSG, SHORT_H_SIZE,
+ 0, dnode, tipc_own_addr,
+ tsk_peer_port(tsk),
+ tsk->ref, TIPC_CONN_SHUTDOWN);
+ tipc_link_xmit(buf, dnode, tsk->ref);
}
-
+ tsk->connected = 0;
sock->state = SS_DISCONNECTING;
-
+ tipc_node_remove_conn(dnode, tsk->ref);
/* fall through */
case SS_DISCONNECTING:
@@ -1937,6 +2109,432 @@ restart:
return res;
}
+static void tipc_sk_timeout(unsigned long ref)
+{
+ struct tipc_sock *tsk;
+ struct sock *sk;
+ struct sk_buff *buf = NULL;
+ u32 peer_port, peer_node;
+
+ tsk = tipc_sk_get(ref);
+ if (!tsk)
+ return;
+
+ sk = &tsk->sk;
+ bh_lock_sock(sk);
+ if (!tsk->connected) {
+ bh_unlock_sock(sk);
+ goto exit;
+ }
+ peer_port = tsk_peer_port(tsk);
+ peer_node = tsk_peer_node(tsk);
+
+ if (tsk->probing_state == TIPC_CONN_PROBING) {
+ /* Previous probe not answered -> self abort */
+ buf = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG,
+ SHORT_H_SIZE, 0, tipc_own_addr,
+ peer_node, ref, peer_port,
+ TIPC_ERR_NO_PORT);
+ } else {
+ buf = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE,
+ 0, peer_node, tipc_own_addr,
+ peer_port, ref, TIPC_OK);
+ tsk->probing_state = TIPC_CONN_PROBING;
+ k_start_timer(&tsk->timer, tsk->probing_interval);
+ }
+ bh_unlock_sock(sk);
+ if (buf)
+ tipc_link_xmit(buf, peer_node, ref);
+exit:
+ tipc_sk_put(tsk);
+}
+
+static int tipc_sk_publish(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq)
+{
+ struct publication *publ;
+ u32 key;
+
+ if (tsk->connected)
+ return -EINVAL;
+ key = tsk->ref + tsk->pub_count + 1;
+ if (key == tsk->ref)
+ return -EADDRINUSE;
+
+ publ = tipc_nametbl_publish(seq->type, seq->lower, seq->upper,
+ scope, tsk->ref, key);
+ if (unlikely(!publ))
+ return -EINVAL;
+
+ list_add(&publ->pport_list, &tsk->publications);
+ tsk->pub_count++;
+ tsk->published = 1;
+ return 0;
+}
+
+static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope,
+ struct tipc_name_seq const *seq)
+{
+ struct publication *publ;
+ struct publication *safe;
+ int rc = -EINVAL;
+
+ list_for_each_entry_safe(publ, safe, &tsk->publications, pport_list) {
+ if (seq) {
+ if (publ->scope != scope)
+ continue;
+ if (publ->type != seq->type)
+ continue;
+ if (publ->lower != seq->lower)
+ continue;
+ if (publ->upper != seq->upper)
+ break;
+ tipc_nametbl_withdraw(publ->type, publ->lower,
+ publ->ref, publ->key);
+ rc = 0;
+ break;
+ }
+ tipc_nametbl_withdraw(publ->type, publ->lower,
+ publ->ref, publ->key);
+ rc = 0;
+ }
+ if (list_empty(&tsk->publications))
+ tsk->published = 0;
+ return rc;
+}
+
+static int tipc_sk_show(struct tipc_sock *tsk, char *buf,
+ int len, int full_id)
+{
+ struct publication *publ;
+ int ret;
+
+ if (full_id)
+ ret = tipc_snprintf(buf, len, "<%u.%u.%u:%u>:",
+ tipc_zone(tipc_own_addr),
+ tipc_cluster(tipc_own_addr),
+ tipc_node(tipc_own_addr), tsk->ref);
+ else
+ ret = tipc_snprintf(buf, len, "%-10u:", tsk->ref);
+
+ if (tsk->connected) {
+ u32 dport = tsk_peer_port(tsk);
+ u32 destnode = tsk_peer_node(tsk);
+
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " connected to <%u.%u.%u:%u>",
+ tipc_zone(destnode),
+ tipc_cluster(destnode),
+ tipc_node(destnode), dport);
+ if (tsk->conn_type != 0)
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " via {%u,%u}", tsk->conn_type,
+ tsk->conn_instance);
+ } else if (tsk->published) {
+ ret += tipc_snprintf(buf + ret, len - ret, " bound to");
+ list_for_each_entry(publ, &tsk->publications, pport_list) {
+ if (publ->lower == publ->upper)
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " {%u,%u}", publ->type,
+ publ->lower);
+ else
+ ret += tipc_snprintf(buf + ret, len - ret,
+ " {%u,%u,%u}", publ->type,
+ publ->lower, publ->upper);
+ }
+ }
+ ret += tipc_snprintf(buf + ret, len - ret, "\n");
+ return ret;
+}
+
+struct sk_buff *tipc_sk_socks_show(void)
+{
+ struct sk_buff *buf;
+ struct tlv_desc *rep_tlv;
+ char *pb;
+ int pb_len;
+ struct tipc_sock *tsk;
+ int str_len = 0;
+ u32 ref = 0;
+
+ buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
+ if (!buf)
+ return NULL;
+ rep_tlv = (struct tlv_desc *)buf->data;
+ pb = TLV_DATA(rep_tlv);
+ pb_len = ULTRA_STRING_MAX_LEN;
+
+ tsk = tipc_sk_get_next(&ref);
+ for (; tsk; tsk = tipc_sk_get_next(&ref)) {
+ lock_sock(&tsk->sk);
+ str_len += tipc_sk_show(tsk, pb + str_len,
+ pb_len - str_len, 0);
+ release_sock(&tsk->sk);
+ tipc_sk_put(tsk);
+ }
+ str_len += 1; /* for "\0" */
+ skb_put(buf, TLV_SPACE(str_len));
+ TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+ return buf;
+}
+
+/* tipc_sk_reinit: set non-zero address in all existing sockets
+ * when we go from standalone to network mode.
+ */
+void tipc_sk_reinit(void)
+{
+ struct tipc_msg *msg;
+ u32 ref = 0;
+ struct tipc_sock *tsk = tipc_sk_get_next(&ref);
+
+ for (; tsk; tsk = tipc_sk_get_next(&ref)) {
+ lock_sock(&tsk->sk);
+ msg = &tsk->phdr;
+ msg_set_prevnode(msg, tipc_own_addr);
+ msg_set_orignode(msg, tipc_own_addr);
+ release_sock(&tsk->sk);
+ tipc_sk_put(tsk);
+ }
+}
+
+/**
+ * struct reference - TIPC socket reference entry
+ * @tsk: pointer to socket associated with reference entry
+ * @ref: reference value for socket (combines instance & array index info)
+ */
+struct reference {
+ struct tipc_sock *tsk;
+ u32 ref;
+};
+
+/**
+ * struct tipc_ref_table - table of TIPC socket reference entries
+ * @entries: pointer to array of reference entries
+ * @capacity: array index of first unusable entry
+ * @init_point: array index of first uninitialized entry
+ * @first_free: array index of first unused socket reference entry
+ * @last_free: array index of last unused socket reference entry
+ * @index_mask: bitmask for array index portion of reference values
+ * @start_mask: initial value for instance value portion of reference values
+ */
+struct ref_table {
+ struct reference *entries;
+ u32 capacity;
+ u32 init_point;
+ u32 first_free;
+ u32 last_free;
+ u32 index_mask;
+ u32 start_mask;
+};
+
+/* Socket reference table consists of 2**N entries.
+ *
+ * State Socket ptr Reference
+ * ----- ---------- ---------
+ * In use non-NULL XXXX|own index
+ * (XXXX changes each time entry is acquired)
+ * Free NULL YYYY|next free index
+ * (YYYY is one more than last used XXXX)
+ * Uninitialized NULL 0
+ *
+ * Entry 0 is not used; this allows index 0 to denote the end of the free list.
+ *
+ * Note that a reference value of 0 does not necessarily indicate that an
+ * entry is uninitialized, since the last entry in the free list could also
+ * have a reference value of 0 (although this is unlikely).
+ */
+
+static struct ref_table tipc_ref_table;
+
+static DEFINE_RWLOCK(ref_table_lock);
+
+/**
+ * tipc_ref_table_init - create reference table for sockets
+ */
+int tipc_sk_ref_table_init(u32 req_sz, u32 start)
+{
+ struct reference *table;
+ u32 actual_sz;
+
+ /* account for unused entry, then round up size to a power of 2 */
+
+ req_sz++;
+ for (actual_sz = 16; actual_sz < req_sz; actual_sz <<= 1) {
+ /* do nothing */
+ };
+
+ /* allocate table & mark all entries as uninitialized */
+ table = vzalloc(actual_sz * sizeof(struct reference));
+ if (table == NULL)
+ return -ENOMEM;
+
+ tipc_ref_table.entries = table;
+ tipc_ref_table.capacity = req_sz;
+ tipc_ref_table.init_point = 1;
+ tipc_ref_table.first_free = 0;
+ tipc_ref_table.last_free = 0;
+ tipc_ref_table.index_mask = actual_sz - 1;
+ tipc_ref_table.start_mask = start & ~tipc_ref_table.index_mask;
+
+ return 0;
+}
+
+/**
+ * tipc_ref_table_stop - destroy reference table for sockets
+ */
+void tipc_sk_ref_table_stop(void)
+{
+ if (!tipc_ref_table.entries)
+ return;
+ vfree(tipc_ref_table.entries);
+ tipc_ref_table.entries = NULL;
+}
+
+/* tipc_ref_acquire - create reference to a socket
+ *
+ * Register an socket pointer in the reference table.
+ * Returns a unique reference value that is used from then on to retrieve the
+ * socket pointer, or to determine if the socket has been deregistered.
+ */
+u32 tipc_sk_ref_acquire(struct tipc_sock *tsk)
+{
+ u32 index;
+ u32 index_mask;
+ u32 next_plus_upper;
+ u32 ref = 0;
+ struct reference *entry;
+
+ if (unlikely(!tsk)) {
+ pr_err("Attempt to acquire ref. to non-existent obj\n");
+ return 0;
+ }
+ if (unlikely(!tipc_ref_table.entries)) {
+ pr_err("Ref. table not found in acquisition attempt\n");
+ return 0;
+ }
+
+ /* Take a free entry, if available; otherwise initialize a new one */
+ write_lock_bh(&ref_table_lock);
+ index = tipc_ref_table.first_free;
+ entry = &tipc_ref_table.entries[index];
+
+ if (likely(index)) {
+ index = tipc_ref_table.first_free;
+ entry = &tipc_ref_table.entries[index];
+ index_mask = tipc_ref_table.index_mask;
+ next_plus_upper = entry->ref;
+ tipc_ref_table.first_free = next_plus_upper & index_mask;
+ ref = (next_plus_upper & ~index_mask) + index;
+ entry->tsk = tsk;
+ } else if (tipc_ref_table.init_point < tipc_ref_table.capacity) {
+ index = tipc_ref_table.init_point++;
+ entry = &tipc_ref_table.entries[index];
+ ref = tipc_ref_table.start_mask + index;
+ }
+
+ if (ref) {
+ entry->ref = ref;
+ entry->tsk = tsk;
+ }
+ write_unlock_bh(&ref_table_lock);
+ return ref;
+}
+
+/* tipc_sk_ref_discard - invalidate reference to an socket
+ *
+ * Disallow future references to an socket and free up the entry for re-use.
+ */
+void tipc_sk_ref_discard(u32 ref)
+{
+ struct reference *entry;
+ u32 index;
+ u32 index_mask;
+
+ if (unlikely(!tipc_ref_table.entries)) {
+ pr_err("Ref. table not found during discard attempt\n");
+ return;
+ }
+
+ index_mask = tipc_ref_table.index_mask;
+ index = ref & index_mask;
+ entry = &tipc_ref_table.entries[index];
+
+ write_lock_bh(&ref_table_lock);
+
+ if (unlikely(!entry->tsk)) {
+ pr_err("Attempt to discard ref. to non-existent socket\n");
+ goto exit;
+ }
+ if (unlikely(entry->ref != ref)) {
+ pr_err("Attempt to discard non-existent reference\n");
+ goto exit;
+ }
+
+ /* Mark entry as unused; increment instance part of entry's
+ * reference to invalidate any subsequent references
+ */
+
+ entry->tsk = NULL;
+ entry->ref = (ref & ~index_mask) + (index_mask + 1);
+
+ /* Append entry to free entry list */
+ if (unlikely(tipc_ref_table.first_free == 0))
+ tipc_ref_table.first_free = index;
+ else
+ tipc_ref_table.entries[tipc_ref_table.last_free].ref |= index;
+ tipc_ref_table.last_free = index;
+exit:
+ write_unlock_bh(&ref_table_lock);
+}
+
+/* tipc_sk_get - find referenced socket and return pointer to it
+ */
+struct tipc_sock *tipc_sk_get(u32 ref)
+{
+ struct reference *entry;
+ struct tipc_sock *tsk;
+
+ if (unlikely(!tipc_ref_table.entries))
+ return NULL;
+ read_lock_bh(&ref_table_lock);
+ entry = &tipc_ref_table.entries[ref & tipc_ref_table.index_mask];
+ tsk = entry->tsk;
+ if (likely(tsk && (entry->ref == ref)))
+ sock_hold(&tsk->sk);
+ else
+ tsk = NULL;
+ read_unlock_bh(&ref_table_lock);
+ return tsk;
+}
+
+/* tipc_sk_get_next - lock & return next socket after referenced one
+*/
+struct tipc_sock *tipc_sk_get_next(u32 *ref)
+{
+ struct reference *entry;
+ struct tipc_sock *tsk = NULL;
+ uint index = *ref & tipc_ref_table.index_mask;
+
+ read_lock_bh(&ref_table_lock);
+ while (++index < tipc_ref_table.capacity) {
+ entry = &tipc_ref_table.entries[index];
+ if (!entry->tsk)
+ continue;
+ tsk = entry->tsk;
+ sock_hold(&tsk->sk);
+ *ref = entry->ref;
+ break;
+ }
+ read_unlock_bh(&ref_table_lock);
+ return tsk;
+}
+
+static void tipc_sk_put(struct tipc_sock *tsk)
+{
+ sock_put(&tsk->sk);
+}
+
/**
* tipc_setsockopt - set socket option
* @sock: socket structure
@@ -1955,7 +2553,6 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
u32 value;
int res;
@@ -1973,16 +2570,16 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- res = tipc_port_set_importance(port, value);
+ res = tsk_set_importance(tsk, value);
break;
case TIPC_SRC_DROPPABLE:
if (sock->type != SOCK_STREAM)
- tipc_port_set_unreliable(port, value);
+ tsk_set_unreliable(tsk, value);
else
res = -ENOPROTOOPT;
break;
case TIPC_DEST_DROPPABLE:
- tipc_port_set_unreturnable(port, value);
+ tsk_set_unreturnable(tsk, value);
break;
case TIPC_CONN_TIMEOUT:
tipc_sk(sk)->conn_timeout = value;
@@ -2015,7 +2612,6 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
- struct tipc_port *port = &tsk->port;
int len;
u32 value;
int res;
@@ -2032,16 +2628,16 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt,
switch (opt) {
case TIPC_IMPORTANCE:
- value = tipc_port_importance(port);
+ value = tsk_importance(tsk);
break;
case TIPC_SRC_DROPPABLE:
- value = tipc_port_unreliable(port);
+ value = tsk_unreliable(tsk);
break;
case TIPC_DEST_DROPPABLE:
- value = tipc_port_unreturnable(port);
+ value = tsk_unreturnable(tsk);
break;
case TIPC_CONN_TIMEOUT:
- value = tipc_sk(sk)->conn_timeout;
+ value = tsk->conn_timeout;
/* no need to set "res", since already 0 at this point */
break;
case TIPC_NODE_RECVQ_DEPTH: