summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c6
-rw-r--r--net/9p/trans_fd.c2
-rw-r--r--net/9p/trans_virtio.c4
-rw-r--r--net/9p/trans_xen.c4
-rw-r--r--net/batman-adv/log.c2
-rw-r--r--net/bridge/br_multicast.c4
-rw-r--r--net/caif/chnl_net.c19
-rw-r--r--net/core/dev.c16
-rw-r--r--net/core/netclassid_cgroup.c7
-rw-r--r--net/core/netprio_cgroup.c10
-rw-r--r--net/core/pktgen.c1
-rw-r--r--net/core/skbuff.c2
-rw-r--r--net/core/sock.c37
-rw-r--r--net/dccp/minisocks.c2
-rw-r--r--net/dsa/dsa.c5
-rw-r--r--net/dsa/dsa2.c160
-rw-r--r--net/dsa/dsa_priv.h1
-rw-r--r--net/dsa/slave.c12
-rw-r--r--net/dsa/tag_ocelot.c2
-rw-r--r--net/dsa/tag_ocelot_8021q.c2
-rw-r--r--net/dsa/tag_rtl4_a.c7
-rw-r--r--net/ipv4/cipso_ipv4.c18
-rw-r--r--net/ipv4/ip_gre.c9
-rw-r--r--net/ipv4/nexthop.c23
-rw-r--r--net/ipv4/tcp_input.c2
-rw-r--r--net/ipv4/udp_tunnel_nic.c2
-rw-r--r--net/ipv6/addrconf.c28
-rw-r--r--net/ipv6/ip6_fib.c3
-rw-r--r--net/ipv6/ip6_gre.c2
-rw-r--r--net/ipv6/mcast.c10
-rw-r--r--net/ipv6/netfilter/nf_socket_ipv6.c4
-rw-r--r--net/ipv6/seg6_iptunnel.c2
-rw-r--r--net/l2tp/l2tp_core.c4
-rw-r--r--net/mac802154/iface.c2
-rw-r--r--net/mctp/route.c2
-rw-r--r--net/mptcp/pm_netlink.c10
-rw-r--r--net/mptcp/protocol.c97
-rw-r--r--net/mptcp/protocol.h3
-rw-r--r--net/ncsi/internal.h3
-rw-r--r--net/ncsi/ncsi-manage.c25
-rw-r--r--net/ncsi/ncsi-pkt.h6
-rw-r--r--net/ncsi/ncsi-rsp.c42
-rw-r--r--net/netfilter/nf_conntrack_core.c103
-rw-r--r--net/netfilter/nf_conntrack_expect.c25
-rw-r--r--net/netfilter/nf_conntrack_netlink.c4
-rw-r--r--net/netfilter/nf_conntrack_standalone.c4
-rw-r--r--net/netfilter/nf_nat_core.c18
-rw-r--r--net/netfilter/nft_ct.c9
-rw-r--r--net/packet/af_packet.c2
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/sched/sch_fq_codel.c12
-rw-r--r--net/smc/smc_clc.c3
-rw-r--r--net/smc/smc_core.c2
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c2
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c49
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/clnt.c66
-rw-r--r--net/sunrpc/debugfs.c2
-rw-r--r--net/sunrpc/rpc_pipe.c2
-rw-r--r--net/sunrpc/svc.c39
-rw-r--r--net/sunrpc/svc_xprt.c13
-rw-r--r--net/sunrpc/svcauth.c8
-rw-r--r--net/sunrpc/svcauth_unix.c18
-rw-r--r--net/sunrpc/sysfs.c36
-rw-r--r--net/sunrpc/xprt.c32
-rw-r--r--net/sunrpc/xprtmultipath.c1
-rw-r--r--net/sunrpc/xprtrdma/backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c14
-rw-r--r--net/sunrpc/xprtrdma/transport.c13
-rw-r--r--net/sunrpc/xprtrdma/verbs.c28
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h2
-rw-r--r--net/sunrpc/xprtsock.c15
-rw-r--r--net/tipc/socket.c4
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/vmw_vsock/af_vsock.c10
-rw-r--r--net/vmw_vsock/virtio_transport_common.c23
76 files changed, 753 insertions, 416 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index b7b958f61faf..213f12ed76cd 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -30,6 +30,8 @@
#define CREATE_TRACE_POINTS
#include <trace/events/9p.h>
+#define DEFAULT_MSIZE (128 * 1024)
+
/*
* Client Option Parsing (code inspired by NFS code)
* - a little lazy - parse all client options
@@ -65,7 +67,7 @@ EXPORT_SYMBOL(p9_is_proto_dotu);
int p9_show_client_options(struct seq_file *m, struct p9_client *clnt)
{
- if (clnt->msize != 8192)
+ if (clnt->msize != DEFAULT_MSIZE)
seq_printf(m, ",msize=%u", clnt->msize);
seq_printf(m, ",trans=%s", clnt->trans_mod->name);
@@ -139,7 +141,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
int ret = 0;
clnt->proto_version = p9_proto_2000L;
- clnt->msize = 8192;
+ clnt->msize = DEFAULT_MSIZE;
if (!opts)
return 0;
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f4dd0456beaf..007bbcc68010 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -34,7 +34,7 @@
#include <linux/syscalls.h> /* killme */
#define P9_PORT 564
-#define MAX_SOCK_BUF (64*1024)
+#define MAX_SOCK_BUF (1024*1024)
#define MAXPOLLWADDR 2
static struct p9_trans_module p9_tcp_trans;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 2bbd7dce0f1d..490a4c900339 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -610,7 +610,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL);
if (!chan->vc_wq) {
err = -ENOMEM;
- goto out_free_tag;
+ goto out_remove_file;
}
init_waitqueue_head(chan->vc_wq);
chan->ring_bufs_avail = 1;
@@ -628,6 +628,8 @@ static int p9_virtio_probe(struct virtio_device *vdev)
return 0;
+out_remove_file:
+ sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr);
out_free_tag:
kfree(tag);
out_free_vq:
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index f4fea28e05da..3ec1a51a6944 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -138,7 +138,7 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size)
static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
{
- struct xen_9pfs_front_priv *priv = NULL;
+ struct xen_9pfs_front_priv *priv;
RING_IDX cons, prod, masked_cons, masked_prod;
unsigned long flags;
u32 size = p9_req->tc.size;
@@ -151,7 +151,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req)
break;
}
read_unlock(&xen_9pfs_lock);
- if (!priv || priv->client != client)
+ if (list_entry_is_head(priv, &xen_9pfs_devs, list))
return -EINVAL;
num = p9_req->tc.tag % priv->num_rings;
diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c
index f0e5d1429662..7a93a1e94c40 100644
--- a/net/batman-adv/log.c
+++ b/net/batman-adv/log.c
@@ -7,7 +7,7 @@
#include "log.h"
#include "main.h"
-#include <stdarg.h>
+#include <linux/stdarg.h>
#include "trace.h"
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 9231617a16e4..3523c8c7068f 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -4255,7 +4255,7 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
bool del = false;
brmctx = br_multicast_port_ctx_get_global(pmctx);
- spin_lock(&brmctx->br->multicast_lock);
+ spin_lock_bh(&brmctx->br->multicast_lock);
if (pmctx->multicast_router == val) {
/* Refresh the temp router port timer */
if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) {
@@ -4305,7 +4305,7 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx,
}
err = 0;
unlock:
- spin_unlock(&brmctx->br->multicast_lock);
+ spin_unlock_bh(&brmctx->br->multicast_lock);
return err;
}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 37b67194c0df..414dc5671c45 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -53,20 +53,6 @@ struct chnl_net {
enum caif_states state;
};
-static void robust_list_del(struct list_head *delete_node)
-{
- struct list_head *list_node;
- struct list_head *n;
- ASSERT_RTNL();
- list_for_each_safe(list_node, n, &chnl_net_list) {
- if (list_node == delete_node) {
- list_del(list_node);
- return;
- }
- }
- WARN_ON(1);
-}
-
static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt)
{
struct sk_buff *skb;
@@ -364,6 +350,7 @@ static int chnl_net_init(struct net_device *dev)
ASSERT_RTNL();
priv = netdev_priv(dev);
strncpy(priv->name, dev->name, sizeof(priv->name));
+ INIT_LIST_HEAD(&priv->list_field);
return 0;
}
@@ -372,7 +359,7 @@ static void chnl_net_uninit(struct net_device *dev)
struct chnl_net *priv;
ASSERT_RTNL();
priv = netdev_priv(dev);
- robust_list_del(&priv->list_field);
+ list_del_init(&priv->list_field);
}
static const struct net_device_ops netdev_ops = {
@@ -537,7 +524,7 @@ static void __exit chnl_exit_module(void)
rtnl_lock();
list_for_each_safe(list_node, _tmp, &chnl_net_list) {
dev = list_entry(list_node, struct chnl_net, list_field);
- list_del(list_node);
+ list_del_init(list_node);
delete_device(dev);
}
rtnl_unlock();
diff --git a/net/core/dev.c b/net/core/dev.c
index 74fd402d26dd..7ee9fecd3aff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6923,12 +6923,16 @@ EXPORT_SYMBOL(napi_disable);
*/
void napi_enable(struct napi_struct *n)
{
- BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
- smp_mb__before_atomic();
- clear_bit(NAPI_STATE_SCHED, &n->state);
- clear_bit(NAPI_STATE_NPSVC, &n->state);
- if (n->dev->threaded && n->thread)
- set_bit(NAPI_STATE_THREADED, &n->state);
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ BUG_ON(!test_bit(NAPI_STATE_SCHED, &val));
+
+ new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC);
+ if (n->dev->threaded && n->thread)
+ new |= NAPIF_STATE_THREADED;
+ } while (cmpxchg(&n->state, val, new) != val);
}
EXPORT_SYMBOL(napi_enable);
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index b49c57d35a88..1a6a86693b74 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n)
struct update_classid_context *ctx = (void *)v;
struct socket *sock = sock_from_file(file);
- if (sock) {
- spin_lock(&cgroup_sk_update_lock);
+ if (sock)
sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid);
- spin_unlock(&cgroup_sk_update_lock);
- }
if (--ctx->batch == 0) {
ctx->batch = UPDATE_CLASSID_BATCH;
return n + 1;
@@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft,
struct css_task_iter it;
struct task_struct *p;
- cgroup_sk_alloc_disable();
-
cs->classid = (u32)value;
css_task_iter_start(css, 0, &it);
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 99a431c56f23..8456dfbe2eb4 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
if (!dev)
return -ENODEV;
- cgroup_sk_alloc_disable();
-
rtnl_lock();
ret = netprio_set_prio(of_css(of), dev, prio);
@@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of,
static int update_netprio(const void *v, struct file *file, unsigned n)
{
struct socket *sock = sock_from_file(file);
- if (sock) {
- spin_lock(&cgroup_sk_update_lock);
+
+ if (sock)
sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data,
(unsigned long)v);
- spin_unlock(&cgroup_sk_update_lock);
- }
return 0;
}
@@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset)
struct task_struct *p;
struct cgroup_subsys_state *css;
- cgroup_sk_alloc_disable();
-
cgroup_taskset_for_each(p, css, tset) {
void *v = (void *)(unsigned long)css->id;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9e5a3249373c..a3d74e2704c4 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3602,7 +3602,6 @@ out:
static int pktgen_thread_worker(void *arg)
{
- DEFINE_WAIT(wait);
struct pktgen_thread *t = arg;
struct pktgen_dev *pkt_dev = NULL;
int cpu = t->cpu;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f9311762cc47..2170bea2c7de 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3884,7 +3884,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_push(nskb, -skb_network_offset(nskb) + offset);
skb_release_head_state(nskb);
- __copy_skb_header(nskb, skb);
+ __copy_skb_header(nskb, skb);
skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..512e629f9780 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3179,17 +3179,15 @@ EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
if (sk->sk_lock.owned)
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
}
EXPORT_SYMBOL(lock_sock_nested);
@@ -3227,24 +3225,35 @@ EXPORT_SYMBOL(release_sock);
*/
bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock)
{
+ /* The sk_lock has mutex_lock() semantics here. */
+ mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
+
might_sleep();
spin_lock_bh(&sk->sk_lock.slock);
- if (!sk->sk_lock.owned)
+ if (!sk->sk_lock.owned) {
/*
- * Note : We must disable BH
+ * Fast path return with bottom halves disabled and
+ * sock::sk_lock.slock held.
+ *
+ * The 'mutex' is not contended and holding
+ * sock::sk_lock.slock prevents all other lockers to
+ * proceed so the corresponding unlock_sock_fast() can
+ * avoid the slow path of release_sock() completely and
+ * just release slock.
+ *
+ * From a semantical POV this is equivalent to 'acquiring'
+ * the 'mutex', hence the corresponding lockdep
+ * mutex_release() has to happen in the fast path of
+ * unlock_sock_fast().
*/
return false;
+ }
__lock_sock(sk);
sk->sk_lock.owned = 1;
- spin_unlock(&sk->sk_lock.slock);
- /*
- * The sk_lock has mutex_lock() semantics here:
- */
- mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
__acquire(&sk->sk_lock.slock);
- local_bh_enable();
+ spin_unlock_bh(&sk->sk_lock.slock);
return true;
}
EXPORT_SYMBOL(lock_sock_fast);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index c5c74a34d139..91e7a2202697 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -94,6 +94,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackvec = NULL;
newdp->dccps_service_list = NULL;
+ newdp->dccps_hc_rx_ccid = NULL;
+ newdp->dccps_hc_tx_ccid = NULL;
newdp->dccps_service = dreq->dreq_service;
newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo;
newdp->dccps_timestamp_time = dreq->dreq_timestamp_time;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 1dc45e40f961..41f36ad8b0ec 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -345,6 +345,11 @@ bool dsa_schedule_work(struct work_struct *work)
return queue_work(dsa_owq, work);
}
+void dsa_flush_workqueue(void)
+{
+ flush_workqueue(dsa_owq);
+}
+
int dsa_devlink_param_get(struct devlink *dl, u32 id,
struct devlink_param_gset_ctx *ctx)
{
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 1b2b25d7bd02..b29262eee00b 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -429,6 +429,7 @@ static int dsa_port_setup(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
bool dsa_port_link_registered = false;
+ struct dsa_switch *ds = dp->ds;
bool dsa_port_enabled = false;
int err = 0;
@@ -438,6 +439,12 @@ static int dsa_port_setup(struct dsa_port *dp)
INIT_LIST_HEAD(&dp->fdbs);
INIT_LIST_HEAD(&dp->mdbs);
+ if (ds->ops->port_setup) {
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
switch (dp->type) {
case DSA_PORT_TYPE_UNUSED:
dsa_port_disable(dp);
@@ -480,8 +487,11 @@ static int dsa_port_setup(struct dsa_port *dp)
dsa_port_disable(dp);
if (err && dsa_port_link_registered)
dsa_port_link_unregister_of(dp);
- if (err)
+ if (err) {
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
return err;
+ }
dp->setup = true;
@@ -533,11 +543,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp)
static void dsa_port_teardown(struct dsa_port *dp)
{
struct devlink_port *dlp = &dp->devlink_port;
+ struct dsa_switch *ds = dp->ds;
struct dsa_mac_addr *a, *tmp;
if (!dp->setup)
return;
+ if (ds->ops->port_teardown)
+ ds->ops->port_teardown(ds, dp->index);
+
devlink_port_type_clear(dlp);
switch (dp->type) {
@@ -581,6 +595,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp)
dp->devlink_port_setup = false;
}
+/* Destroy the current devlink port, and create a new one which has the UNUSED
+ * flavour. At this point, any call to ds->ops->port_setup has been already
+ * balanced out by a call to ds->ops->port_teardown, so we know that any
+ * devlink port regions the driver had are now unregistered. We then call its
+ * ds->ops->port_setup again, in order for the driver to re-create them on the
+ * new devlink port.
+ */
+static int dsa_port_reinit_as_unused(struct dsa_port *dp)
+{
+ struct dsa_switch *ds = dp->ds;
+ int err;
+
+ dsa_port_devlink_teardown(dp);
+ dp->type = DSA_PORT_TYPE_UNUSED;
+ err = dsa_port_devlink_setup(dp);
+ if (err)
+ return err;
+
+ if (ds->ops->port_setup) {
+ /* On error, leave the devlink port registered,
+ * dsa_switch_teardown will clean it up later.
+ */
+ err = ds->ops->port_setup(ds, dp->index);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
static int dsa_devlink_info_get(struct devlink *dl,
struct devlink_info_req *req,
struct netlink_ext_ack *extack)
@@ -836,7 +880,7 @@ static int dsa_switch_setup(struct dsa_switch *ds)
devlink_params_publish(ds->devlink);
if (!ds->slave_mii_bus && ds->ops->phy_read) {
- ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev);
+ ds->slave_mii_bus = mdiobus_alloc();
if (!ds->slave_mii_bus) {
err = -ENOMEM;
goto teardown;
@@ -846,13 +890,16 @@ static int dsa_switch_setup(struct dsa_switch *ds)
err = mdiobus_register(ds->slave_mii_bus);
if (err < 0)
- goto teardown;
+ goto free_slave_mii_bus;
}
ds->setup = true;
return 0;
+free_slave_mii_bus:
+ if (ds->slave_mii_bus && ds->ops->phy_read)
+ mdiobus_free(ds->slave_mii_bus);
teardown:
if (ds->ops->teardown)
ds->ops->teardown(ds);
@@ -877,8 +924,11 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
if (!ds->setup)
return;
- if (ds->slave_mii_bus && ds->ops->phy_read)
+ if (ds->slave_mii_bus && ds->ops->phy_read) {
mdiobus_unregister(ds->slave_mii_bus);
+ mdiobus_free(ds->slave_mii_bus);
+ ds->slave_mii_bus = NULL;
+ }
dsa_switch_unregister_notifier(ds);
@@ -897,6 +947,33 @@ static void dsa_switch_teardown(struct dsa_switch *ds)
ds->setup = false;
}
+/* First tear down the non-shared, then the shared ports. This ensures that
+ * all work items scheduled by our switchdev handlers for user ports have
+ * completed before we destroy the refcounting kept on the shared ports.
+ */
+static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_user(dp) || dsa_port_is_unused(dp))
+ dsa_port_teardown(dp);
+
+ dsa_flush_workqueue();
+
+ list_for_each_entry(dp, &dst->ports, list)
+ if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp))
+ dsa_port_teardown(dp);
+}
+
+static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
+{
+ struct dsa_port *dp;
+
+ list_for_each_entry(dp, &dst->ports, list)
+ dsa_switch_teardown(dp->ds);
+}
+
static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -911,38 +988,22 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst)
list_for_each_entry(dp, &dst->ports, list) {
err = dsa_port_setup(dp);
if (err) {
- dsa_port_devlink_teardown(dp);
- dp->type = DSA_PORT_TYPE_UNUSED;
- err = dsa_port_devlink_setup(dp);
+ err = dsa_port_reinit_as_unused(dp);
if (err)
goto teardown;
- continue;
}
}
return 0;
teardown:
- list_for_each_entry(dp, &dst->ports, list)
- dsa_port_teardown(dp);
+ dsa_tree_teardown_ports(dst);
- list_for_each_entry(dp, &dst->ports, list)
- dsa_switch_teardown(dp->ds);
+ dsa_tree_teardown_switches(dst);
return err;
}
-static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst)
-{
- struct dsa_port *dp;
-
- list_for_each_entry(dp, &dst->ports, list)
- dsa_port_teardown(dp);
-
- list_for_each_entry(dp, &dst->ports, list)
- dsa_switch_teardown(dp->ds);
-}
-
static int dsa_tree_setup_master(struct dsa_switch_tree *dst)
{
struct dsa_port *dp;
@@ -1034,6 +1095,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst)
teardown_master:
dsa_tree_teardown_master(dst);
teardown_switches:
+ dsa_tree_teardown_ports(dst);
dsa_tree_teardown_switches(dst);
teardown_cpu_ports:
dsa_tree_teardown_cpu_ports(dst);
@@ -1052,6 +1114,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst)
dsa_tree_teardown_master(dst);
+ dsa_tree_teardown_ports(dst);
+
dsa_tree_teardown_switches(dst);
dsa_tree_teardown_cpu_ports(dst);
@@ -1546,3 +1610,53 @@ void dsa_unregister_switch(struct dsa_switch *ds)
mutex_unlock(&dsa2_mutex);
}
EXPORT_SYMBOL_GPL(dsa_unregister_switch);
+
+/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is
+ * blocking that operation from completion, due to the dev_hold taken inside
+ * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of
+ * the DSA master, so that the system can reboot successfully.
+ */
+void dsa_switch_shutdown(struct dsa_switch *ds)
+{
+ struct net_device *master, *slave_dev;
+ LIST_HEAD(unregister_list);
+ struct dsa_port *dp;
+
+ mutex_lock(&dsa2_mutex);
+ rtnl_lock();
+
+ list_for_each_entry(dp, &ds->dst->ports, list) {
+ if (dp->ds != ds)
+ continue;
+
+ if (!dsa_port_is_user(dp))
+ continue;
+
+ master = dp->cpu_dp->master;
+ slave_dev = dp->slave;
+
+ netdev_upper_dev_unlink(master, slave_dev);
+ /* Just unlinking ourselves as uppers of the master is not
+ * sufficient. When the master net device unregisters, that will
+ * also call dev_close, which we will catch as NETDEV_GOING_DOWN
+ * and trigger a dev_close on our own devices (dsa_slave_close).
+ * In turn, that will call dev_mc_unsync on the master's net
+ * device. If the master is also a DSA switch port, this will
+ * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on
+ * its own master. Lockdep will complain about the fact that
+ * all cascaded masters have the same dsa_master_addr_list_lock_key,
+ * which it normally would not do if the cascaded masters would
+ * be in a proper upper/lower relationship, which we've just
+ * destroyed.
+ * To suppress the lockdep warnings, let's actually unregister
+ * the DSA slave interfaces too, to avoid the nonsensical
+ * multicast address list synchronization on shutdown.
+ */
+ unregister_netdevice_queue(slave_dev, &unregister_list);
+ }
+ unregister_netdevice_many(&unregister_list);
+
+ rtnl_unlock();
+ mutex_unlock(&dsa2_mutex);
+}
+EXPORT_SYMBOL_GPL(dsa_switch_shutdown);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 33ab7d7af9eb..a5c9bc7b66c6 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -170,6 +170,7 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops);
const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf);
bool dsa_schedule_work(struct work_struct *work);
+void dsa_flush_workqueue(void);
const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops);
static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops)
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 662ff531d4e2..a2bf2d8ac65b 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1854,13 +1854,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
* use the switch internal MDIO bus instead
*/
ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags);
- if (ret) {
- netdev_err(slave_dev,
- "failed to connect to port %d: %d\n",
- dp->index, ret);
- phylink_destroy(dp->pl);
- return ret;
- }
+ }
+ if (ret) {
+ netdev_err(slave_dev, "failed to connect to PHY: %pe\n",
+ ERR_PTR(ret));
+ phylink_destroy(dp->pl);
}
return ret;
diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index d37ab98e7fe1..8025ed778d33 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2019 NXP Semiconductors
+/* Copyright 2019 NXP
*/
#include <linux/dsa/ocelot.h>
#include <soc/mscc/ocelot.h>
diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c
index 3038a257ba05..59072930cb02 100644
--- a/net/dsa/tag_ocelot_8021q.c
+++ b/net/dsa/tag_ocelot_8021q.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
-/* Copyright 2020-2021 NXP Semiconductors
+/* Copyright 2020-2021 NXP
*
* An implementation of the software-defined tag_8021q.c tagger format, which
* also preserves full functionality under a vlan_filtering bridge. It does
diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c
index 40811bab4d09..f920487ae145 100644
--- a/net/dsa/tag_rtl4_a.c
+++ b/net/dsa/tag_rtl4_a.c
@@ -54,9 +54,10 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
p = (__be16 *)tag;
*p = htons(RTL4_A_ETHERTYPE);
- out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8);
- /* The lower bits is the port number */
- out |= (u8)dp->index;
+ out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT) | (2 << 8);
+ /* The lower bits indicate the port number */
+ out |= BIT(dp->index);
+
p = (__be16 *)(tag + 2);
*p = htons(out);
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 7fbd0b532f52..099259fc826a 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -465,16 +465,14 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def)
if (!doi_def)
return;
- if (doi_def->map.std) {
- switch (doi_def->type) {
- case CIPSO_V4_MAP_TRANS:
- kfree(doi_def->map.std->lvl.cipso);
- kfree(doi_def->map.std->lvl.local);
- kfree(doi_def->map.std->cat.cipso);
- kfree(doi_def->map.std->cat.local);
- kfree(doi_def->map.std);
- break;
- }
+ switch (doi_def->type) {
+ case CIPSO_V4_MAP_TRANS:
+ kfree(doi_def->map.std->lvl.cipso);
+ kfree(doi_def->map.std->lvl.local);
+ kfree(doi_def->map.std->cat.cipso);
+ kfree(doi_def->map.std->cat.local);
+ kfree(doi_def->map.std);
+ break;
}
kfree(doi_def);
}
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 177d26d8fb9c..0fe6c936dc54 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -473,8 +473,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
static int gre_handle_offloads(struct sk_buff *skb, bool csum)
{
- if (csum && skb_checksum_start(skb) < skb->data)
- return -EINVAL;
return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
@@ -632,15 +630,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
}
if (dev->header_ops) {
+ const int pull_len = tunnel->hlen + sizeof(struct iphdr);
+
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
+ if (pull_len > skb_transport_offset(skb))
+ goto free_skb;
+
/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
* to gre header.
*/
- skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ skb_pull(skb, pull_len);
skb_reset_mac_header(skb);
} else {
if (skb_cow_head(skb, dev->needed_headroom))
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 4075230b14c6..9e8100728d46 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1982,6 +1982,8 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old,
rcu_assign_pointer(old->nh_grp, newg);
if (newg->resilient) {
+ /* Make sure concurrent readers are not using 'oldg' anymore. */
+ synchronize_net();
rcu_assign_pointer(oldg->res_table, tmp_table);
rcu_assign_pointer(oldg->spare->res_table, tmp_table);
}
@@ -2490,6 +2492,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
.fc_gw4 = cfg->gw.ipv4,
.fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0,
.fc_flags = cfg->nh_flags,
+ .fc_nlinfo = cfg->nlinfo,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
};
@@ -2528,6 +2531,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh,
.fc_ifindex = cfg->nh_ifindex,
.fc_gateway = cfg->gw.ipv6,
.fc_flags = cfg->nh_flags,
+ .fc_nlinfo = cfg->nlinfo,
.fc_encap = cfg->nh_encap,
.fc_encap_type = cfg->nh_encap_type,
.fc_is_fdb = cfg->nh_fdb,
@@ -3563,6 +3567,7 @@ static struct notifier_block nh_netdev_notifier = {
};
static int nexthops_dump(struct net *net, struct notifier_block *nb,
+ enum nexthop_event_type event_type,
struct netlink_ext_ack *extack)
{
struct rb_root *root = &net->nexthop.rb_root;
@@ -3573,8 +3578,7 @@ static int nexthops_dump(struct net *net, struct notifier_block *nb,
struct nexthop *nh;
nh = rb_entry(node, struct nexthop, rb_node);
- err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh,
- extack);
+ err = call_nexthop_notifier(nb, net, event_type, nh, extack);
if (err)
break;
}
@@ -3588,7 +3592,7 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb,
int err;
rtnl_lock();
- err = nexthops_dump(net, nb, extack);
+ err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack);
if (err)
goto unlock;
err = blocking_notifier_chain_register(&net->nexthop.notifier_chain,
@@ -3601,8 +3605,17 @@ EXPORT_SYMBOL(register_nexthop_notifier);
int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
{
- return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
- nb);
+ int err;
+
+ rtnl_lock();
+ err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain,
+ nb);
+ if (err)
+ goto unlock;
+ nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL);
+unlock:
+ rtnl_unlock();
+ return err;
}
EXPORT_SYMBOL(unregister_nexthop_notifier);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3f7bd7ae7d7a..141e85e6422b 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1346,7 +1346,7 @@ static u8 tcp_sacktag_one(struct sock *sk,
if (dup_sack && (sacked & TCPCB_RETRANS)) {
if (tp->undo_marker && tp->undo_retrans > 0 &&
after(end_seq, tp->undo_marker))
- tp->undo_retrans--;
+ tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount);
if ((sacked & TCPCB_SACKED_ACKED) &&
before(start_seq, state->reord))
state->reord = start_seq;
diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c
index 0d122edc368d..b91003538d87 100644
--- a/net/ipv4/udp_tunnel_nic.c
+++ b/net/ipv4/udp_tunnel_nic.c
@@ -935,7 +935,7 @@ static int __init udp_tunnel_nic_init_module(void)
{
int err;
- udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0);
+ udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0);
if (!udp_tunnel_nic_workqueue)
return -ENOMEM;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 17756f3ed33b..c6a90b7bbb70 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3092,19 +3092,22 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
}
}
-#if IS_ENABLED(CONFIG_IPV6_SIT)
-static void sit_add_v4_addrs(struct inet6_dev *idev)
+#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+static void add_v4_addrs(struct inet6_dev *idev)
{
struct in6_addr addr;
struct net_device *dev;
struct net *net = dev_net(idev->dev);
- int scope, plen;
+ int scope, plen, offset = 0;
u32 pflags = 0;
ASSERT_RTNL();
memset(&addr, 0, sizeof(struct in6_addr));
- memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4);
+ /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */
+ if (idev->dev->addr_len == sizeof(struct in6_addr))
+ offset = sizeof(struct in6_addr) - 4;
+ memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
if (idev->dev->flags&IFF_POINTOPOINT) {
addr.s6_addr32[0] = htonl(0xfe800000);
@@ -3342,8 +3345,6 @@ static void addrconf_dev_config(struct net_device *dev)
(dev->type != ARPHRD_IEEE1394) &&
(dev->type != ARPHRD_TUNNEL6) &&
(dev->type != ARPHRD_6LOWPAN) &&
- (dev->type != ARPHRD_IP6GRE) &&
- (dev->type != ARPHRD_IPGRE) &&
(dev->type != ARPHRD_TUNNEL) &&
(dev->type != ARPHRD_NONE) &&
(dev->type != ARPHRD_RAWIP)) {
@@ -3391,14 +3392,14 @@ static void addrconf_sit_config(struct net_device *dev)
return;
}
- sit_add_v4_addrs(idev);
+ add_v4_addrs(idev);
if (dev->flags&IFF_POINTOPOINT)
addrconf_add_mroute(dev);
}
#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE)
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
static void addrconf_gre_config(struct net_device *dev)
{
struct inet6_dev *idev;
@@ -3411,7 +3412,13 @@ static void addrconf_gre_config(struct net_device *dev)
return;
}
- addrconf_addr_gen(idev, true);
+ if (dev->type == ARPHRD_ETHER) {
+ addrconf_addr_gen(idev, true);
+ return;
+ }
+
+ add_v4_addrs(idev);
+
if (dev->flags & IFF_POINTOPOINT)
addrconf_add_mroute(dev);
}
@@ -3587,7 +3594,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
addrconf_sit_config(dev);
break;
#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE)
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+ case ARPHRD_IP6GRE:
case ARPHRD_IPGRE:
addrconf_gre_config(dev);
break;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 1bec5b22f80d..0371d2c14145 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1378,7 +1378,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
int err = -ENOMEM;
int allow_create = 1;
int replace_required = 0;
- int sernum = fib6_new_sernum(info->nl_net);
if (info->nlh) {
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -1478,7 +1477,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt,
if (!err) {
if (rt->nh)
list_add(&rt->nh_list, &rt->nh->f6i_list);
- __fib6_update_sernum_upto_root(rt, sernum);
+ __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net));
fib6_start_gc(info->nl_net, rt);
}
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 7baf41d160f5..3ad201d372d8 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -629,8 +629,6 @@ drop:
static int gre_handle_offloads(struct sk_buff *skb, bool csum)
{
- if (csum && skb_checksum_start(skb) < skb->data)
- return -EINVAL;
return iptunnel_handle_offloads(skb,
csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index cd951faa2fac..bed8155508c8 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1356,8 +1356,8 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld,
return 0;
}
-static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
- unsigned long *max_delay)
+static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
+ unsigned long *max_delay)
{
*max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL);
@@ -1367,7 +1367,7 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld,
idev->mc_maxdelay = *max_delay;
- return 0;
+ return;
}
/* called with rcu_read_lock() */
@@ -1454,9 +1454,7 @@ static void __mld_query_work(struct sk_buff *skb)
mlh2 = (struct mld2_query *)skb_transport_header(skb);
- err = mld_process_v2(idev, mlh2, &max_delay);
- if (err < 0)
- goto out;
+ mld_process_v2(idev, mlh2, &max_delay);
if (group_type == IPV6_ADDR_ANY) { /* general query */
if (mlh2->mld2q_nsrcs)
diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c
index 6fd54744cbc3..aa5bb8789ba0 100644
--- a/net/ipv6/netfilter/nf_socket_ipv6.c
+++ b/net/ipv6/netfilter/nf_socket_ipv6.c
@@ -99,7 +99,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
{
__be16 dport, sport;
const struct in6_addr *daddr = NULL, *saddr = NULL;
- struct ipv6hdr *iph = ipv6_hdr(skb);
+ struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var;
struct sk_buff *data_skb = NULL;
int doff = 0;
int thoff = 0, tproto;
@@ -129,8 +129,6 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb,
thoff + sizeof(*hp);
} else if (tproto == IPPROTO_ICMPV6) {
- struct ipv6hdr ipv6_var;
-
if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr,
&sport, &dport, &ipv6_var))
return NULL;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 1bf5f5ae75ac..3adc5d9211ad 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -385,7 +385,7 @@ static int seg6_output_core(struct net *net, struct sock *sk,
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
struct seg6_lwt *slwt;
- int err = -EINVAL;
+ int err;
err = seg6_do_srh(skb);
if (unlikely(err))
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 53486b162f01..93271a2632b8 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -869,8 +869,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
}
if (tunnel->version == L2TP_HDR_VER_3 &&
- l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
+ l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) {
+ l2tp_session_dec_refcount(session);
goto invalid;
+ }
l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
l2tp_session_dec_refcount(session);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 1cf5ac09edcb..323d3d2d986f 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -617,7 +617,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name,
{
struct net_device *ndev = NULL;
struct ieee802154_sub_if_data *sdata = NULL;
- int ret = -ENOMEM;
+ int ret;
ASSERT_RTNL();
diff --git a/net/mctp/route.c b/net/mctp/route.c
index 5265525011ad..5ca186d53cb0 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -1083,8 +1083,10 @@ static void __net_exit mctp_routes_net_exit(struct net *net)
{
struct mctp_route *rt;
+ rcu_read_lock();
list_for_each_entry_rcu(rt, &net->mctp.routes, list)
mctp_route_release(rt);
+ rcu_read_unlock();
}
static struct pernet_operations mctp_net_ops = {
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 1e4289c507ff..c4f9a5ce3815 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -644,15 +644,12 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk)
subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node);
if (subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- bool slow;
spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for %s",
mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr");
- slow = lock_sock_fast(ssk);
- tcp_send_ack(ssk);
- unlock_sock_fast(ssk, slow);
+ mptcp_subflow_send_ack(ssk);
spin_lock_bh(&msk->pm.lock);
}
}
@@ -669,7 +666,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
struct sock *sk = (struct sock *)msk;
struct mptcp_addr_info local;
- bool slow;
local_address((struct sock_common *)ssk, &local);
if (!addresses_equal(&local, addr, addr->port))
@@ -682,9 +678,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for mp_prio");
- slow = lock_sock_fast(ssk);
- tcp_send_ack(ssk);
- unlock_sock_fast(ssk, slow);
+ mptcp_subflow_send_ack(ssk);
spin_lock_bh(&msk->pm.lock);
return 0;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ade648c3512b..dbcebf56798f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -440,19 +440,22 @@ static bool tcp_can_send_ack(const struct sock *ssk)
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
}
+void mptcp_subflow_send_ack(struct sock *ssk)
+{
+ bool slow;
+
+ slow = lock_sock_fast(ssk);
+ if (tcp_can_send_ack(ssk))
+ tcp_send_ack(ssk);
+ unlock_sock_fast(ssk, slow);
+}
+
static void mptcp_send_ack(struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- mptcp_for_each_subflow(msk, subflow) {
- struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- bool slow;
-
- slow = lock_sock_fast(ssk);
- if (tcp_can_send_ack(ssk))
- tcp_send_ack(ssk);
- unlock_sock_fast(ssk, slow);
- }
+ mptcp_for_each_subflow(msk, subflow)
+ mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow));
}
static void mptcp_subflow_cleanup_rbuf(struct sock *ssk)
@@ -1003,6 +1006,13 @@ static void mptcp_wmem_uncharge(struct sock *sk, int size)
msk->wmem_reserved += size;
}
+static void __mptcp_mem_reclaim_partial(struct sock *sk)
+{
+ lockdep_assert_held_once(&sk->sk_lock.slock);
+ __mptcp_update_wmem(sk);
+ sk_mem_reclaim_partial(sk);
+}
+
static void mptcp_mem_reclaim_partial(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1094,12 +1104,8 @@ static void __mptcp_clean_una(struct sock *sk)
msk->recovery = false;
out:
- if (cleaned) {
- if (tcp_under_memory_pressure(sk)) {
- __mptcp_update_wmem(sk);
- sk_mem_reclaim_partial(sk);
- }
- }
+ if (cleaned && tcp_under_memory_pressure(sk))
+ __mptcp_mem_reclaim_partial(sk);
if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) {
if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
@@ -1179,6 +1185,7 @@ struct mptcp_sendmsg_info {
u16 limit;
u16 sent;
unsigned int flags;
+ bool data_lock_held;
};
static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq,
@@ -1250,17 +1257,17 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp)
return false;
}
-static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk)
+static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held)
{
- return !ssk->sk_tx_skb_cache &&
- tcp_under_memory_pressure(sk);
-}
+ gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation;
-static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk)
-{
- if (unlikely(mptcp_must_reclaim_memory(sk, ssk)))
- mptcp_mem_reclaim_partial(sk);
- return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation);
+ if (unlikely(tcp_under_memory_pressure(sk))) {
+ if (data_lock_held)
+ __mptcp_mem_reclaim_partial(sk);
+ else
+ mptcp_mem_reclaim_partial(sk);
+ }
+ return __mptcp_alloc_tx_skb(sk, ssk, gfp);
}
/* note: this always recompute the csum on the whole skb, even
@@ -1284,7 +1291,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
bool zero_window_probe = false;
struct mptcp_ext *mpext = NULL;
struct sk_buff *skb, *tail;
- bool can_collapse = false;
+ bool must_collapse = false;
int size_bias = 0;
int avail_size;
size_t ret = 0;
@@ -1304,16 +1311,24 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
* SSN association set here
*/
mpext = skb_ext_find(skb, SKB_EXT_MPTCP);
- can_collapse = (info->size_goal - skb->len > 0) &&
- mptcp_skb_can_collapse_to(data_seq, skb, mpext);
- if (!can_collapse) {
+ if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) {
TCP_SKB_CB(skb)->eor = 1;
- } else {
+ goto alloc_skb;
+ }
+
+ must_collapse = (info->size_goal > skb->len) &&
+ (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags);
+ if (must_collapse) {
size_bias = skb->len;
avail_size = info->size_goal - skb->len;
}
}
+alloc_skb:
+ if (!must_collapse &&
+ !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held))
+ return 0;
+
/* Zero window and all data acked? Probe. */
avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size);
if (avail_size == 0) {
@@ -1343,7 +1358,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (skb == tail) {
TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH;
mpext->data_len += ret;
- WARN_ON_ONCE(!can_collapse);
WARN_ON_ONCE(zero_window_probe);
goto out;
}
@@ -1530,15 +1544,6 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
if (ssk != prev_ssk)
lock_sock(ssk);
- /* keep it simple and always provide a new skb for the
- * subflow, even if we will not use it when collapsing
- * on the pending one
- */
- if (!mptcp_alloc_tx_skb(sk, ssk)) {
- mptcp_push_release(sk, ssk, &info);
- goto out;
- }
-
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0) {
mptcp_push_release(sk, ssk, &info);
@@ -1571,7 +1576,9 @@ out:
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- struct mptcp_sendmsg_info info;
+ struct mptcp_sendmsg_info info = {
+ .data_lock_held = true,
+ };
struct mptcp_data_frag *dfrag;
struct sock *xmit_ssk;
int len, copied = 0;
@@ -1597,13 +1604,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk)
goto out;
}
- if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) {
- __mptcp_update_wmem(sk);
- sk_mem_reclaim_partial(sk);
- }
- if (!__mptcp_alloc_tx_skb(sk, ssk, GFP_ATOMIC))
- goto out;
-
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
goto out;
@@ -2409,9 +2409,6 @@ static void __mptcp_retrans(struct sock *sk)
info.sent = 0;
info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent;
while (info.sent < info.limit) {
- if (!mptcp_alloc_tx_skb(sk, ssk))
- break;
-
ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info);
if (ret <= 0)
break;
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index d7aba1c4dc48..d3e6fd1615f1 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -34,7 +34,7 @@
#define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \
OPTION_MPTCP_MPC_ACK)
#define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \
- OPTION_MPTCP_MPJ_SYNACK)
+ OPTION_MPTCP_MPJ_ACK)
/* MPTCP option subtypes */
#define MPTCPOPT_MP_CAPABLE 0
@@ -573,6 +573,7 @@ void __init mptcp_subflow_init(void);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
+void mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h
index 0b6cfd3b31e0..03757e76bb6b 100644
--- a/net/ncsi/internal.h
+++ b/net/ncsi/internal.h
@@ -80,6 +80,7 @@ enum {
#define NCSI_OEM_MFR_BCM_ID 0x113d
#define NCSI_OEM_MFR_INTEL_ID 0x157
/* Intel specific OEM command */
+#define NCSI_OEM_INTEL_CMD_GMA 0x06 /* CMD ID for Get MAC */
#define NCSI_OEM_INTEL_CMD_KEEP_PHY 0x20 /* CMD ID for Keep PHY up */
/* Broadcom specific OEM Command */
#define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */
@@ -89,6 +90,7 @@ enum {
#define NCSI_OEM_MLX_CMD_SMAF 0x01 /* CMD ID for Set MC Affinity */
#define NCSI_OEM_MLX_CMD_SMAF_PARAM 0x07 /* Parameter for SMAF */
/* OEM Command payload lengths*/
+#define NCSI_OEM_INTEL_CMD_GMA_LEN 5
#define NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN 7
#define NCSI_OEM_BCM_CMD_GMA_LEN 12
#define NCSI_OEM_MLX_CMD_GMA_LEN 8
@@ -99,6 +101,7 @@ enum {
/* Mac address offset in OEM response */
#define BCM_MAC_ADDR_OFFSET 28
#define MLX_MAC_ADDR_OFFSET 8
+#define INTEL_MAC_ADDR_OFFSET 1
struct ncsi_channel_version {
diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c
index 89c7742cd72e..7121ce2a47c0 100644
--- a/net/ncsi/ncsi-manage.c
+++ b/net/ncsi/ncsi-manage.c
@@ -795,13 +795,36 @@ static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca)
return ret;
}
+static int ncsi_oem_gma_handler_intel(struct ncsi_cmd_arg *nca)
+{
+ unsigned char data[NCSI_OEM_INTEL_CMD_GMA_LEN];
+ int ret = 0;
+
+ nca->payload = NCSI_OEM_INTEL_CMD_GMA_LEN;
+
+ memset(data, 0, NCSI_OEM_INTEL_CMD_GMA_LEN);
+ *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID);
+ data[4] = NCSI_OEM_INTEL_CMD_GMA;
+
+ nca->data = data;
+
+ ret = ncsi_xmit_cmd(nca);
+ if (ret)
+ netdev_err(nca->ndp->ndev.dev,
+ "NCSI: Failed to transmit cmd 0x%x during configure\n",
+ nca->type);
+
+ return ret;
+}
+
/* OEM Command handlers initialization */
static struct ncsi_oem_gma_handler {
unsigned int mfr_id;
int (*handler)(struct ncsi_cmd_arg *nca);
} ncsi_oem_gma_handlers[] = {
{ NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm },
- { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }
+ { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx },
+ { NCSI_OEM_MFR_INTEL_ID, ncsi_oem_gma_handler_intel }
};
static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id)
diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h
index 80938b338fee..ba66c7dc3a21 100644
--- a/net/ncsi/ncsi-pkt.h
+++ b/net/ncsi/ncsi-pkt.h
@@ -178,6 +178,12 @@ struct ncsi_rsp_oem_bcm_pkt {
unsigned char data[]; /* Cmd specific Data */
};
+/* Intel Response Data */
+struct ncsi_rsp_oem_intel_pkt {
+ unsigned char cmd; /* OEM Command ID */
+ unsigned char data[]; /* Cmd specific Data */
+};
+
/* Get Link Status */
struct ncsi_rsp_gls_pkt {
struct ncsi_rsp_pkt_hdr rsp; /* Response header */
diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c
index d48374894817..6447a09932f5 100644
--- a/net/ncsi/ncsi-rsp.c
+++ b/net/ncsi/ncsi-rsp.c
@@ -699,9 +699,51 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr)
return 0;
}
+/* Response handler for Intel command Get Mac Address */
+static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr)
+{
+ struct ncsi_dev_priv *ndp = nr->ndp;
+ struct net_device *ndev = ndp->ndev.dev;
+ const struct net_device_ops *ops = ndev->netdev_ops;
+ struct ncsi_rsp_oem_pkt *rsp;
+ struct sockaddr saddr;
+ int ret = 0;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+
+ saddr.sa_family = ndev->type;
+ ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN);
+ /* Increase mac address by 1 for BMC's address */
+ eth_addr_inc((u8 *)saddr.sa_data);
+ if (!is_valid_ether_addr((const u8 *)saddr.sa_data))
+ return -ENXIO;
+
+ /* Set the flag for GMA command which should only be called once */
+ ndp->gma_flag = 1;
+
+ ret = ops->ndo_set_mac_address(ndev, &saddr);
+ if (ret < 0)
+ netdev_warn(ndev,
+ "NCSI: 'Writing mac address to device failed\n");
+
+ return ret;
+}
+
/* Response handler for Intel card */
static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr)
{
+ struct ncsi_rsp_oem_intel_pkt *intel;
+ struct ncsi_rsp_oem_pkt *rsp;
+
+ /* Get the response header */
+ rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp);
+ intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data);
+
+ if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA)
+ return ncsi_rsp_handler_oem_intel_gma(nr);
+
return 0;
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index d31dbccbe7bd..94e18fb9690d 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -21,7 +21,6 @@
#include <linux/stddef.h>
#include <linux/slab.h>
#include <linux/random.h>
-#include <linux/jhash.h>
#include <linux/siphash.h>
#include <linux/err.h>
#include <linux/percpu.h>
@@ -78,6 +77,8 @@ static __read_mostly bool nf_conntrack_locks_all;
#define GC_SCAN_INTERVAL (120u * HZ)
#define GC_SCAN_MAX_DURATION msecs_to_jiffies(10)
+#define MAX_CHAINLEN 64u
+
static struct conntrack_gc_work conntrack_gc_work;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
@@ -184,25 +185,31 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size);
unsigned int nf_conntrack_max __read_mostly;
EXPORT_SYMBOL_GPL(nf_conntrack_max);
seqcount_spinlock_t nf_conntrack_generation __read_mostly;
-static unsigned int nf_conntrack_hash_rnd __read_mostly;
+static siphash_key_t nf_conntrack_hash_rnd __read_mostly;
static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
const struct net *net)
{
- unsigned int n;
- u32 seed;
+ struct {
+ struct nf_conntrack_man src;
+ union nf_inet_addr dst_addr;
+ u32 net_mix;
+ u16 dport;
+ u16 proto;
+ } __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- /* The direction must be ignored, so we hash everything up to the
- * destination ports (which is a multiple of 4) and treat the last
- * three bytes manually.
- */
- seed = nf_conntrack_hash_rnd ^ net_hash_mix(net);
- n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32);
- return jhash2((u32 *)tuple, n, seed ^
- (((__force __u16)tuple->dst.u.all << 16) |
- tuple->dst.protonum));
+ memset(&combined, 0, sizeof(combined));
+
+ /* The direction must be ignored, so handle usable members manually. */
+ combined.src = tuple->src;
+ combined.dst_addr = tuple->dst.u3;
+ combined.net_mix = net_hash_mix(net);
+ combined.dport = (__force __u16)tuple->dst.u.all;
+ combined.proto = tuple->dst.protonum;
+
+ return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd);
}
static u32 scale_hash(u32 hash)
@@ -835,7 +842,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct hlist_nulls_node *n;
+ unsigned int chainlen = 0;
unsigned int sequence;
+ int err = -EEXIST;
zone = nf_ct_zone(ct);
@@ -849,15 +858,24 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
} while (nf_conntrack_double_lock(net, hash, reply_hash, sequence));
/* See if there's one in the list already, including reverse */
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+ if (chainlen++ > MAX_CHAINLEN)
+ goto chaintoolong;
+ }
+
+ chainlen = 0;
+
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
+ if (chainlen++ > MAX_CHAINLEN)
+ goto chaintoolong;
+ }
smp_wmb();
/* The caller holds a reference to this object */
@@ -867,11 +885,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
NF_CT_STAT_INC(net, insert);
local_bh_enable();
return 0;
-
+chaintoolong:
+ NF_CT_STAT_INC(net, chaintoolong);
+ err = -ENOSPC;
out:
nf_conntrack_double_unlock(hash, reply_hash);
local_bh_enable();
- return -EEXIST;
+ return err;
}
EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);
@@ -1084,6 +1104,7 @@ int
__nf_conntrack_confirm(struct sk_buff *skb)
{
const struct nf_conntrack_zone *zone;
+ unsigned int chainlen = 0, sequence;
unsigned int hash, reply_hash;
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
@@ -1091,7 +1112,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
struct hlist_nulls_node *n;
enum ip_conntrack_info ctinfo;
struct net *net;
- unsigned int sequence;
int ret = NF_DROP;
ct = nf_ct_get(skb, &ctinfo);
@@ -1151,15 +1171,28 @@ __nf_conntrack_confirm(struct sk_buff *skb)
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
not in the hash. If there is, we lost race. */
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode)
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
zone, net))
goto out;
+ if (chainlen++ > MAX_CHAINLEN)
+ goto chaintoolong;
+ }
- hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode)
+ chainlen = 0;
+ hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) {
if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
zone, net))
goto out;
+ if (chainlen++ > MAX_CHAINLEN) {
+chaintoolong:
+ nf_ct_add_to_dying_list(ct);
+ NF_CT_STAT_INC(net, chaintoolong);
+ NF_CT_STAT_INC(net, insert_failed);
+ ret = NF_DROP;
+ goto dying;
+ }
+ }
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
@@ -2594,26 +2627,24 @@ int nf_conntrack_init_start(void)
spin_lock_init(&nf_conntrack_locks[i]);
if (!nf_conntrack_htable_size) {
- /* Idea from tcp.c: use 1/16384 of memory.
- * On i386: 32MB machine has 512 buckets.
- * >= 1GB machines have 16384 buckets.
- * >= 4GB machines have 65536 buckets.
- */
nf_conntrack_htable_size
= (((nr_pages << PAGE_SHIFT) / 16384)
/ sizeof(struct hlist_head));
- if (nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
- nf_conntrack_htable_size = 65536;
+ if (BITS_PER_LONG >= 64 &&
+ nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE)))
+ nf_conntrack_htable_size = 262144;
else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE))
- nf_conntrack_htable_size = 16384;
- if (nf_conntrack_htable_size < 32)
- nf_conntrack_htable_size = 32;
-
- /* Use a max. factor of four by default to get the same max as
- * with the old struct list_heads. When a table size is given
- * we use the old value of 8 to avoid reducing the max.
- * entries. */
- max_factor = 4;
+ nf_conntrack_htable_size = 65536;
+
+ if (nf_conntrack_htable_size < 1024)
+ nf_conntrack_htable_size = 1024;
+ /* Use a max. factor of one by default to keep the average
+ * hash chain length at 2 entries. Each entry has to be added
+ * twice (once for original direction, once for reply).
+ * When a table size is given we use the old value of 8 to
+ * avoid implicit reduction of the max entries setting.
+ */
+ max_factor = 1;
}
nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 1e851bc2e61a..f562eeef4234 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -17,7 +17,7 @@
#include <linux/err.h>
#include <linux/percpu.h>
#include <linux/kernel.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/moduleparam.h>
#include <linux/export.h>
#include <net/net_namespace.h>
@@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
-static unsigned int nf_ct_expect_hashrnd __read_mostly;
+static siphash_key_t nf_ct_expect_hashrnd __read_mostly;
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
@@ -81,15 +81,26 @@ static void nf_ct_expectation_timed_out(struct timer_list *t)
static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
- unsigned int hash, seed;
+ struct {
+ union nf_inet_addr dst_addr;
+ u32 net_mix;
+ u16 dport;
+ u8 l3num;
+ u8 protonum;
+ } __aligned(SIPHASH_ALIGNMENT) combined;
+ u32 hash;
get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd));
- seed = nf_ct_expect_hashrnd ^ net_hash_mix(n);
+ memset(&combined, 0, sizeof(combined));
- hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
- (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
- (__force __u16)tuple->dst.u.all) ^ seed);
+ combined.dst_addr = tuple->dst.u3;
+ combined.net_mix = net_hash_mix(n);
+ combined.dport = (__force __u16)tuple->dst.u.all;
+ combined.l3num = tuple->src.l3num;
+ combined.protonum = tuple->dst.protonum;
+
+ hash = siphash(&combined, sizeof(combined), &nf_ct_expect_hashrnd);
return reciprocal_scale(hash, nf_ct_expect_hsize);
}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 5f9fc6b94855..f1e5443fe7c7 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2528,7 +2528,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
nla_put_be32(skb, CTA_STATS_SEARCH_RESTART,
htonl(st->search_restart)) ||
nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE,
- htonl(st->clash_resolve)))
+ htonl(st->clash_resolve)) ||
+ nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG,
+ htonl(st->chaintoolong)))
goto nla_put_failure;
nlmsg_end(skb, nlh);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 7e0d956da51d..80f675d884b2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -432,7 +432,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
unsigned int nr_conntracks;
if (v == SEQ_START_TOKEN) {
- seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
+ seq_puts(seq, "entries clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
return 0;
}
@@ -447,7 +447,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
st->invalid,
0,
0,
- 0,
+ st->chaintoolong,
st->insert,
st->insert_failed,
st->drop,
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 7de595ead06a..7008961f5cb0 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -13,7 +13,7 @@
#include <linux/skbuff.h>
#include <linux/gfp.h>
#include <net/xfrm.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/rtnetlink.h>
#include <net/netfilter/nf_conntrack.h>
@@ -34,7 +34,7 @@ static unsigned int nat_net_id __read_mostly;
static struct hlist_head *nf_nat_bysource __read_mostly;
static unsigned int nf_nat_htable_size __read_mostly;
-static unsigned int nf_nat_hash_rnd __read_mostly;
+static siphash_key_t nf_nat_hash_rnd __read_mostly;
struct nf_nat_lookup_hook_priv {
struct nf_hook_entries __rcu *entries;
@@ -153,12 +153,22 @@ static unsigned int
hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
unsigned int hash;
+ struct {
+ struct nf_conntrack_man src;
+ u32 net_mix;
+ u32 protonum;
+ } __aligned(SIPHASH_ALIGNMENT) combined;
get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd));
+ memset(&combined, 0, sizeof(combined));
+
/* Original src, to ensure we map it consistently if poss. */
- hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32),
- tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n));
+ combined.src = tuple->src;
+ combined.net_mix = net_hash_mix(n);
+ combined.protonum = tuple->dst.protonum;
+
+ hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd);
return reciprocal_scale(hash, nf_nat_htable_size);
}
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 337e22d8b40b..99b1de14ff7e 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -41,6 +41,7 @@ struct nft_ct_helper_obj {
#ifdef CONFIG_NF_CONNTRACK_ZONES
static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template);
static unsigned int nft_ct_pcpu_template_refcnt __read_mostly;
+static DEFINE_MUTEX(nft_ct_pcpu_mutex);
#endif
static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c,
@@ -525,8 +526,10 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv)
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
case NFT_CT_ZONE:
+ mutex_lock(&nft_ct_pcpu_mutex);
if (--nft_ct_pcpu_template_refcnt == 0)
nft_ct_tmpl_put_pcpu();
+ mutex_unlock(&nft_ct_pcpu_mutex);
break;
#endif
default:
@@ -564,9 +567,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx,
#endif
#ifdef CONFIG_NF_CONNTRACK_ZONES
case NFT_CT_ZONE:
- if (!nft_ct_tmpl_alloc_pcpu())
+ mutex_lock(&nft_ct_pcpu_mutex);
+ if (!nft_ct_tmpl_alloc_pcpu()) {
+ mutex_unlock(&nft_ct_pcpu_mutex);
return -ENOMEM;
+ }
nft_ct_pcpu_template_refcnt++;
+ mutex_unlock(&nft_ct_pcpu_mutex);
len = sizeof(u16);
break;
#endif
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 543365f58e97..2a2bc64f75cf 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -46,6 +46,8 @@
* Copyright (C) 2011, <lokec@ccs.neu.edu>
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
#include <linux/ethtool.h>
#include <linux/types.h>
#include <linux/mm.h>
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 525e3ea063b1..ec2322529727 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -493,7 +493,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
goto err;
}
- if (!size || size & 3 || len != size + hdrlen)
+ if (!size || len != ALIGN(size, 4) + hdrlen)
goto err;
if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA &&
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index c4afdd026f51..bb0cd6d3d2c2 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -369,6 +369,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
{
struct fq_codel_sched_data *q = qdisc_priv(sch);
struct nlattr *tb[TCA_FQ_CODEL_MAX + 1];
+ u32 quantum = 0;
int err;
if (!opt)
@@ -386,6 +387,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
q->flows_cnt > 65536)
return -EINVAL;
}
+ if (tb[TCA_FQ_CODEL_QUANTUM]) {
+ quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum > FQ_CODEL_QUANTUM_MAX) {
+ NL_SET_ERR_MSG(extack, "Invalid quantum");
+ return -EINVAL;
+ }
+ }
sch_tree_lock(sch);
if (tb[TCA_FQ_CODEL_TARGET]) {
@@ -412,8 +420,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt,
if (tb[TCA_FQ_CODEL_ECN])
q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]);
- if (tb[TCA_FQ_CODEL_QUANTUM])
- q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM]));
+ if (quantum)
+ q->quantum = quantum;
if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])
q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]));
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index e286dafd6e88..6ec1ebe878ae 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -230,7 +230,8 @@ static int smc_clc_prfx_set(struct socket *clcsock,
goto out_rel;
}
/* get address to which the internal TCP socket is bound */
- kernel_getsockname(clcsock, (struct sockaddr *)&addrs);
+ if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0)
+ goto out_rel;
/* analyze IP specific data of net_device belonging to TCP socket */
addr6 = (struct sockaddr_in6 *)&addrs;
rcu_read_lock();
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index af227b65669e..8280c938be80 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1474,7 +1474,9 @@ static void smc_conn_abort_work(struct work_struct *work)
abort_work);
struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+ lock_sock(&smc->sk);
smc_conn_kill(conn, true);
+ release_sock(&smc->sk);
sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index d1c003a25b0f..61c276bddaf2 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -160,7 +160,7 @@ static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
mutex_lock(&sn->gssp_lock);
clnt = sn->gssp_clnt;
if (clnt)
- atomic_inc(&clnt->cl_count);
+ refcount_inc(&clnt->cl_count);
mutex_unlock(&sn->gssp_lock);
return clnt;
}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 3d685fe328fa..3e776e3dff91 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -194,6 +194,8 @@ static void rsi_request(struct cache_detail *cd,
qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len);
qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len);
(*bpp)[-1] = '\n';
+ WARN_ONCE(*blen < 0,
+ "RPCSEC/GSS credential too large - please use gssproxy\n");
}
static int rsi_parse(struct cache_detail *cd,
@@ -707,11 +709,11 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
/*
* Verify the checksum on the header and return SVC_OK on success.
* Otherwise, return SVC_DROP (in the case of a bad sequence number)
- * or return SVC_DENIED and indicate error in authp.
+ * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat.
*/
static int
gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
- __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp)
+ __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
{
struct gss_ctx *ctx_id = rsci->mechctx;
struct xdr_buf rpchdr;
@@ -725,7 +727,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
xdr_buf_from_iov(&iov, &rpchdr);
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
if (argv->iov_len < 4)
return SVC_DENIED;
flavor = svc_getnl(argv);
@@ -737,13 +739,13 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
if (rqstp->rq_deferred) /* skip verification of revisited request */
return SVC_OK;
if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) {
- *authp = rpcsec_gsserr_credproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
return SVC_DENIED;
}
if (gc->gc_seq > MAXSEQ) {
trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq);
- *authp = rpcsec_gsserr_ctxproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
return SVC_DENIED;
}
if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq))
@@ -1038,6 +1040,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
struct rpc_gss_wire_cred *gc = &svcdata->clcred;
int stat;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+
/*
* A gss export can be specified either by:
* export *(sec=krb5,rw)
@@ -1053,6 +1057,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
stat = svcauth_unix_set_client(rqstp);
if (stat == SVC_DROP || stat == SVC_CLOSE)
return stat;
+
+ rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
@@ -1142,7 +1148,7 @@ static void gss_free_in_token_pages(struct gssp_in_token *in_token)
}
static int gss_read_proxy_verf(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc, __be32 *authp,
+ struct rpc_gss_wire_cred *gc,
struct xdr_netobj *in_handle,
struct gssp_in_token *in_token)
{
@@ -1151,7 +1157,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
int pages, i, res, pgto, pgfrom;
size_t inlen, to_offs, from_offs;
- res = gss_read_common_verf(gc, argv, authp, in_handle);
+ res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle);
if (res)
return res;
@@ -1227,7 +1233,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
* Otherwise, drop the request pending an answer to the upcall.
*/
static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc, __be32 *authp)
+ struct rpc_gss_wire_cred *gc)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -1236,7 +1242,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
memset(&rsikey, 0, sizeof(rsikey));
- ret = gss_read_verf(gc, argv, authp,
+ ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat,
&rsikey.in_handle, &rsikey.in_token);
if (ret)
return ret;
@@ -1339,7 +1345,7 @@ out:
}
static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc, __be32 *authp)
+ struct rpc_gss_wire_cred *gc)
{
struct kvec *resv = &rqstp->rq_res.head[0];
struct xdr_netobj cli_handle;
@@ -1351,8 +1357,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
memset(&ud, 0, sizeof(ud));
- ret = gss_read_proxy_verf(rqstp, gc, authp,
- &ud.in_handle, &ud.in_token);
+ ret = gss_read_proxy_verf(rqstp, gc, &ud.in_handle, &ud.in_token);
if (ret)
return ret;
@@ -1525,7 +1530,7 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
* response here and return SVC_COMPLETE.
*/
static int
-svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_gss_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -1538,7 +1543,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
if (!svcdata)
svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
if (!svcdata)
@@ -1575,22 +1580,22 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
goto auth_err;
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
switch (gc->gc_proc) {
case RPC_GSS_PROC_INIT:
case RPC_GSS_PROC_CONTINUE_INIT:
if (use_gss_proxy(SVC_NET(rqstp)))
- return svcauth_gss_proxy_init(rqstp, gc, authp);
+ return svcauth_gss_proxy_init(rqstp, gc);
else
- return svcauth_gss_legacy_init(rqstp, gc, authp);
+ return svcauth_gss_legacy_init(rqstp, gc);
case RPC_GSS_PROC_DATA:
case RPC_GSS_PROC_DESTROY:
/* Look up the context, and check the verifier: */
- *authp = rpcsec_gsserr_credproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx);
if (!rsci)
goto auth_err;
- switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
+ switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) {
case SVC_OK:
break;
case SVC_DENIED:
@@ -1600,7 +1605,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
}
break;
default:
- *authp = rpc_autherr_rejectedcred;
+ rqstp->rq_auth_stat = rpc_autherr_rejectedcred;
goto auth_err;
}
@@ -1616,13 +1621,13 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
svc_putnl(resv, RPC_SUCCESS);
goto complete;
case RPC_GSS_PROC_DATA:
- *authp = rpcsec_gsserr_ctxproblem;
+ rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
svcdata->verf_start = resv->iov_base + resv->iov_len;
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
rqstp->rq_cred = rsci->cred;
get_group_info(rsci->cred.cr_group_info);
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
switch (gc->gc_svc) {
case RPC_GSS_SVC_NONE:
break;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 1a2c1c44bb00..59641803472c 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -803,7 +803,7 @@ static int cache_request(struct cache_detail *detail,
detail->cache_request(detail, crq->item, &bp, &len);
if (len < 0)
- return -EAGAIN;
+ return -E2BIG;
return PAGE_SIZE - len;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8b4de70e8ead..f056ff931444 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -167,7 +167,7 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event)
case RPC_PIPEFS_MOUNT:
if (clnt->cl_pipedir_objects.pdh_dentry != NULL)
return 1;
- if (atomic_read(&clnt->cl_count) == 0)
+ if (refcount_read(&clnt->cl_count) == 0)
return 1;
break;
case RPC_PIPEFS_UMOUNT:
@@ -419,7 +419,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
clnt->cl_rtt = &clnt->cl_rtt_default;
rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval);
- atomic_set(&clnt->cl_count, 1);
+ refcount_set(&clnt->cl_count, 1);
if (nodename == NULL)
nodename = utsname()->nodename;
@@ -431,7 +431,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args,
if (err)
goto out_no_path;
if (parent)
- atomic_inc(&parent->cl_count);
+ refcount_inc(&parent->cl_count);
trace_rpc_clnt_new(clnt, xprt, program->name, args->servername);
return clnt;
@@ -918,18 +918,16 @@ rpc_free_client(struct rpc_clnt *clnt)
static struct rpc_clnt *
rpc_free_auth(struct rpc_clnt *clnt)
{
- if (clnt->cl_auth == NULL)
- return rpc_free_client(clnt);
-
/*
* Note: RPCSEC_GSS may need to send NULL RPC calls in order to
* release remaining GSS contexts. This mechanism ensures
* that it can do so safely.
*/
- atomic_inc(&clnt->cl_count);
- rpcauth_release(clnt->cl_auth);
- clnt->cl_auth = NULL;
- if (atomic_dec_and_test(&clnt->cl_count))
+ if (clnt->cl_auth != NULL) {
+ rpcauth_release(clnt->cl_auth);
+ clnt->cl_auth = NULL;
+ }
+ if (refcount_dec_and_test(&clnt->cl_count))
return rpc_free_client(clnt);
return NULL;
}
@@ -943,7 +941,7 @@ rpc_release_client(struct rpc_clnt *clnt)
do {
if (list_empty(&clnt->cl_tasks))
wake_up(&destroy_wait);
- if (!atomic_dec_and_test(&clnt->cl_count))
+ if (refcount_dec_not_one(&clnt->cl_count))
break;
clnt = rpc_free_auth(clnt);
} while (clnt != NULL);
@@ -1082,7 +1080,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
if (clnt != NULL) {
rpc_task_set_transport(task, clnt);
task->tk_client = clnt;
- atomic_inc(&clnt->cl_count);
+ refcount_inc(&clnt->cl_count);
if (clnt->cl_softrtry)
task->tk_flags |= RPC_TASK_SOFT;
if (clnt->cl_softerr)
@@ -2694,17 +2692,18 @@ static const struct rpc_procinfo rpcproc_null = {
.p_decode = rpcproc_decode_null,
};
-static int rpc_ping(struct rpc_clnt *clnt)
+static void
+rpc_null_call_prepare(struct rpc_task *task, void *data)
{
- struct rpc_message msg = {
- .rpc_proc = &rpcproc_null,
- };
- int err;
- err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
- RPC_TASK_NULLCREDS);
- return err;
+ task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT;
+ rpc_call_start(task);
}
+static const struct rpc_call_ops rpc_null_ops = {
+ .rpc_call_prepare = rpc_null_call_prepare,
+ .rpc_call_done = rpc_default_callback,
+};
+
static
struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
@@ -2718,7 +2717,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
.rpc_xprt = xprt,
.rpc_message = &msg,
.rpc_op_cred = cred,
- .callback_ops = (ops != NULL) ? ops : &rpc_default_ops,
+ .callback_ops = ops ?: &rpc_null_ops,
.callback_data = data,
.flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN |
RPC_TASK_NULLCREDS,
@@ -2733,6 +2732,19 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
}
EXPORT_SYMBOL_GPL(rpc_call_null);
+static int rpc_ping(struct rpc_clnt *clnt)
+{
+ struct rpc_task *task;
+ int status;
+
+ task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = task->tk_status;
+ rpc_put_task(task);
+ return status;
+}
+
struct rpc_cb_add_xprt_calldata {
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
@@ -2756,6 +2768,7 @@ static void rpc_cb_add_xprt_release(void *calldata)
}
static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = {
+ .rpc_call_prepare = rpc_null_call_prepare,
.rpc_call_done = rpc_cb_add_xprt_done,
.rpc_release = rpc_cb_add_xprt_release,
};
@@ -2774,6 +2787,15 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
struct rpc_cb_add_xprt_calldata *data;
struct rpc_task *task;
+ if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) {
+ rcu_read_lock();
+ pr_warn("SUNRPC: reached max allowed number (%d) did not add "
+ "transport to server: %s\n", clnt->cl_max_connect,
+ rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR));
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
data = kmalloc(sizeof(*data), GFP_NOFS);
if (!data)
return -ENOMEM;
@@ -2786,7 +2808,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt,
task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC,
&rpc_cb_add_xprt_call_ops, data);
-
+ data->xps->xps_nunique_destaddr_xprts++;
rpc_put_task(task);
success:
return 1;
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 827bf3a28178..7dc9cc929bfd 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -90,7 +90,7 @@ static int tasks_open(struct inode *inode, struct file *filp)
struct seq_file *seq = filp->private_data;
struct rpc_clnt *clnt = seq->private = inode->i_private;
- if (!atomic_inc_not_zero(&clnt->cl_count)) {
+ if (!refcount_inc_not_zero(&clnt->cl_count)) {
seq_release(inode, filp);
ret = -EINVAL;
}
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 09c000d490a1..ee5336d73fdd 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -423,7 +423,7 @@ rpc_info_open(struct inode *inode, struct file *file)
spin_lock(&file->f_path.dentry->d_lock);
if (!d_unhashed(file->f_path.dentry))
clnt = RPC_I(inode)->private;
- if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) {
+ if (clnt != NULL && refcount_inc_not_zero(&clnt->cl_count)) {
spin_unlock(&file->f_path.dentry->d_lock);
m->private = clnt;
} else {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index bfcbaf7b3822..a3bbe5ce4570 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1186,22 +1186,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {}
#endif
-__be32
-svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err)
-{
- set_bit(RQ_AUTHERR, &rqstp->rq_flags);
- return auth_err;
-}
-EXPORT_SYMBOL_GPL(svc_return_autherr);
-
-static __be32
-svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp)
-{
- if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags))
- return *statp;
- return rpc_auth_ok;
-}
-
static int
svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
{
@@ -1225,7 +1209,7 @@ svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp)
test_bit(RQ_DROPME, &rqstp->rq_flags))
return 0;
- if (test_bit(RQ_AUTHERR, &rqstp->rq_flags))
+ if (rqstp->rq_auth_stat != rpc_auth_ok)
return 1;
if (*statp != rpc_success)
@@ -1306,7 +1290,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
struct svc_process_info process;
__be32 *statp;
u32 prog, vers;
- __be32 auth_stat, rpc_stat;
+ __be32 rpc_stat;
int auth_res;
__be32 *reply_statp;
@@ -1349,14 +1333,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
* We do this before anything else in order to get a decent
* auth verifier.
*/
- auth_res = svc_authenticate(rqstp, &auth_stat);
+ auth_res = svc_authenticate(rqstp);
/* Also give the program a chance to reject this call: */
- if (auth_res == SVC_OK && progp) {
- auth_stat = rpc_autherr_badcred;
+ if (auth_res == SVC_OK && progp)
auth_res = progp->pg_authenticate(rqstp);
- }
if (auth_res != SVC_OK)
- trace_svc_authenticate(rqstp, auth_res, auth_stat);
+ trace_svc_authenticate(rqstp, auth_res);
switch (auth_res) {
case SVC_OK:
break;
@@ -1415,15 +1397,15 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
goto release_dropit;
if (*statp == rpc_garbage_args)
goto err_garbage;
- auth_stat = svc_get_autherr(rqstp, statp);
- if (auth_stat != rpc_auth_ok)
- goto err_release_bad_auth;
} else {
dprintk("svc: calling dispatcher\n");
if (!process.dispatch(rqstp, statp))
goto release_dropit; /* Release reply info */
}
+ if (rqstp->rq_auth_stat != rpc_auth_ok)
+ goto err_release_bad_auth;
+
/* Check RPC status result */
if (*statp != rpc_success)
resv->iov_len = ((void*)statp) - resv->iov_base + 4;
@@ -1473,13 +1455,14 @@ err_release_bad_auth:
if (procp->pc_release)
procp->pc_release(rqstp);
err_bad_auth:
- dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat));
+ dprintk("svc: authentication failed (%d)\n",
+ be32_to_cpu(rqstp->rq_auth_stat));
serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of accept status: */
xdr_ressize_check(rqstp, reply_statp);
svc_putnl(resv, 1); /* REJECT */
svc_putnl(resv, 1); /* AUTH_ERROR */
- svc_putnl(resv, ntohl(auth_stat)); /* status */
+ svc_putu32(resv, rqstp->rq_auth_stat); /* status */
goto sendit;
err_bad_prog:
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e1153cba9cc6..6316bd2b8f37 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -663,7 +663,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
{
struct svc_serv *serv = rqstp->rq_server;
struct xdr_buf *arg = &rqstp->rq_arg;
- unsigned long pages, filled;
+ unsigned long pages, filled, ret;
pagevec_init(&rqstp->rq_pvec);
@@ -675,11 +675,12 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
pages = RPCSVC_MAXPAGES;
}
- for (;;) {
- filled = alloc_pages_bulk_array(GFP_KERNEL, pages,
- rqstp->rq_pages);
- if (filled == pages)
- break;
+ for (filled = 0; filled < pages; filled = ret) {
+ ret = alloc_pages_bulk_array(GFP_KERNEL, pages,
+ rqstp->rq_pages);
+ if (ret > filled)
+ /* Made progress, don't sleep yet */
+ continue;
set_current_state(TASK_INTERRUPTIBLE);
if (signalled() || kthread_should_stop()) {
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 998b196b6176..5a8b8e03fdd4 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -59,12 +59,12 @@ svc_put_auth_ops(struct auth_ops *aops)
}
int
-svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
+svc_authenticate(struct svc_rqst *rqstp)
{
rpc_authflavor_t flavor;
struct auth_ops *aops;
- *authp = rpc_auth_ok;
+ rqstp->rq_auth_stat = rpc_auth_ok;
flavor = svc_getnl(&rqstp->rq_arg.head[0]);
@@ -72,7 +72,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
aops = svc_get_auth_ops(flavor);
if (aops == NULL) {
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
@@ -80,7 +80,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
init_svc_cred(&rqstp->rq_cred);
rqstp->rq_authop = aops;
- return aops->accept(rqstp, authp);
+ return aops->accept(rqstp);
}
EXPORT_SYMBOL_GPL(svc_authenticate);
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 35b7966ac3b3..d7ed7d49115a 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -681,8 +681,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
rqstp->rq_client = NULL;
if (rqstp->rq_proc == 0)
- return SVC_OK;
+ goto out;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
ipm = ip_map_cached_get(xprt);
if (ipm == NULL)
ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class,
@@ -719,13 +720,16 @@ svcauth_unix_set_client(struct svc_rqst *rqstp)
put_group_info(cred->cr_group_info);
cred->cr_group_info = gi;
}
+
+out:
+ rqstp->rq_auth_stat = rpc_auth_ok;
return SVC_OK;
}
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
static int
-svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_null_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -736,12 +740,12 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
if (svc_getu32(argv) != 0) {
dprintk("svc: bad null cred\n");
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
dprintk("svc: bad null verf\n");
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -785,7 +789,7 @@ struct auth_ops svcauth_null = {
static int
-svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
+svcauth_unix_accept(struct svc_rqst *rqstp)
{
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
@@ -827,7 +831,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
}
groups_sort(cred->cr_group_info);
if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
- *authp = rpc_autherr_badverf;
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -839,7 +843,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
return SVC_OK;
badcred:
- *authp = rpc_autherr_badcred;
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 64da3bfd28e6..9a6f17e18f73 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -100,6 +100,28 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj,
return ret + 1;
}
+static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+ struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj);
+ struct sockaddr_storage saddr;
+ struct sock_xprt *sock;
+ ssize_t ret = -1;
+
+ if (!xprt)
+ return 0;
+
+ sock = container_of(xprt, struct sock_xprt, xprt);
+ if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0)
+ goto out;
+
+ ret = sprintf(buf, "%pISc\n", &saddr);
+out:
+ xprt_put(xprt);
+ return ret + 1;
+}
+
static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
@@ -114,14 +136,16 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj,
"max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n"
"binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n"
"backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n"
- "tasks_queuelen=%ld\n",
+ "tasks_queuelen=%ld\ndst_port=%s\n",
xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs,
xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen,
xprt->sending.qlen, xprt->pending.qlen,
xprt->backlog.qlen, xprt->main,
(xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
get_srcport(xprt) : 0,
- atomic_long_read(&xprt->queuelen));
+ atomic_long_read(&xprt->queuelen),
+ (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ?
+ xprt->address_strings[RPC_DISPLAY_PORT] : "0");
xprt_put(xprt);
return ret + 1;
}
@@ -183,8 +207,10 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj,
if (!xprt_switch)
return 0;
- ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\nqueue_len=%ld\n",
+ ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\n"
+ "num_unique_destaddr=%u\nqueue_len=%ld\n",
xprt_switch->xps_nxprts, xprt_switch->xps_nactive,
+ xprt_switch->xps_nunique_destaddr_xprts,
atomic_long_read(&xprt_switch->xps_queuelen));
xprt_switch_put(xprt_switch);
return ret + 1;
@@ -376,6 +402,9 @@ static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj)
static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr,
0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store);
+static struct kobj_attribute rpc_sysfs_xprt_srcaddr = __ATTR(srcaddr,
+ 0644, rpc_sysfs_xprt_srcaddr_show, NULL);
+
static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info,
0444, rpc_sysfs_xprt_info_show, NULL);
@@ -384,6 +413,7 @@ static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state,
static struct attribute *rpc_sysfs_xprt_attrs[] = {
&rpc_sysfs_xprt_dstaddr.attr,
+ &rpc_sysfs_xprt_srcaddr.attr,
&rpc_sysfs_xprt_info.attr,
&rpc_sysfs_xprt_change_state.attr,
NULL,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 05abe344a269..cfd681700d1a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -762,6 +762,20 @@ void xprt_disconnect_done(struct rpc_xprt *xprt)
EXPORT_SYMBOL_GPL(xprt_disconnect_done);
/**
+ * xprt_schedule_autoclose_locked - Try to schedule an autoclose RPC call
+ * @xprt: transport to disconnect
+ */
+static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt)
+{
+ set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+ queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+ else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state))
+ rpc_wake_up_queued_task_set_status(&xprt->pending,
+ xprt->snd_task, -ENOTCONN);
+}
+
+/**
* xprt_force_disconnect - force a transport to disconnect
* @xprt: transport to disconnect
*
@@ -772,13 +786,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
/* Don't race with the test_bit() in xprt_clear_locked() */
spin_lock(&xprt->transport_lock);
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- /* Try to schedule an autoclose RPC call */
- if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- else if (xprt->snd_task)
- rpc_wake_up_queued_task_set_status(&xprt->pending,
- xprt->snd_task, -ENOTCONN);
+ xprt_schedule_autoclose_locked(xprt);
spin_unlock(&xprt->transport_lock);
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
@@ -818,11 +826,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie)
goto out;
if (test_bit(XPRT_CLOSING, &xprt->state))
goto out;
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- /* Try to schedule an autoclose RPC call */
- if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
- xprt_wake_pending_tasks(xprt, -EAGAIN);
+ xprt_schedule_autoclose_locked(xprt);
out:
spin_unlock(&xprt->transport_lock);
}
@@ -880,12 +884,14 @@ bool xprt_lock_connect(struct rpc_xprt *xprt,
goto out;
if (xprt->snd_task != task)
goto out;
+ set_bit(XPRT_SND_IS_COOKIE, &xprt->state);
xprt->snd_task = cookie;
ret = true;
out:
spin_unlock(&xprt->transport_lock);
return ret;
}
+EXPORT_SYMBOL_GPL(xprt_lock_connect);
void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
{
@@ -895,12 +901,14 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie)
if (!test_bit(XPRT_LOCKED, &xprt->state))
goto out;
xprt->snd_task =NULL;
+ clear_bit(XPRT_SND_IS_COOKIE, &xprt->state);
xprt->ops->release_xprt(xprt, NULL);
xprt_schedule_autodisconnect(xprt);
out:
spin_unlock(&xprt->transport_lock);
wake_up_bit(&xprt->state, XPRT_LOCKED);
}
+EXPORT_SYMBOL_GPL(xprt_unlock_connect);
/**
* xprt_connect - schedule a transport connect operation
diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c
index c60820e45082..1693f81aae37 100644
--- a/net/sunrpc/xprtmultipath.c
+++ b/net/sunrpc/xprtmultipath.c
@@ -139,6 +139,7 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt,
xps->xps_iter_ops = &rpc_xprt_iter_singular;
rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags);
xprt_switch_add_xprt_locked(xps, xprt);
+ xps->xps_nunique_destaddr_xprts = 1;
rpc_sysfs_xprt_setup(xps, xprt, gfp_flags);
}
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 1151efd09b27..17f174d6ea3b 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -115,7 +115,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst)
if (rc < 0)
goto failed_marshal;
- if (rpcrdma_post_sends(r_xprt, req))
+ if (frwr_send(r_xprt, req))
goto drop_connection;
return 0;
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 229fcc9a9064..f700b34a5bfd 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -394,6 +394,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rpcrdma_mr *mr;
unsigned int num_wrs;
+ int ret;
num_wrs = 1;
post_wr = send_wr;
@@ -420,7 +421,10 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
}
trace_xprtrdma_post_send(req);
- return ib_post_send(ep->re_id->qp, post_wr, NULL);
+ ret = ib_post_send(ep->re_id->qp, post_wr, NULL);
+ if (ret)
+ trace_xprtrdma_post_send_err(r_xprt, req, ret);
+ return ret;
}
/**
@@ -557,6 +561,10 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
/* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
+
+ /* Force a connection loss to ensure complete recovery.
+ */
+ rpcrdma_force_disconnect(ep);
}
/**
@@ -653,4 +661,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* retransmission.
*/
rpcrdma_unpin_rqst(req->rl_reply);
+
+ /* Force a connection loss to ensure complete recovery.
+ */
+ rpcrdma_force_disconnect(ep);
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 9c2ffc67c0fd..16e5696314a4 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -250,12 +250,9 @@ xprt_rdma_connect_worker(struct work_struct *work)
xprt->stat.connect_start;
xprt_set_connected(xprt);
rc = -EAGAIN;
- } else {
- /* Force a call to xprt_rdma_close to clean up */
- spin_lock(&xprt->transport_lock);
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
- spin_unlock(&xprt->transport_lock);
- }
+ } else
+ rpcrdma_xprt_disconnect(r_xprt);
+ xprt_unlock_connect(xprt, r_xprt);
xprt_wake_pending_tasks(xprt, rc);
}
@@ -489,6 +486,8 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
unsigned long delay;
+ WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt));
+
delay = 0;
if (ep && ep->re_connect_status != 0) {
delay = xprt_reconnect_delay(xprt);
@@ -661,7 +660,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst)
goto drop_connection;
rqst->rq_xtime = ktime_get();
- if (rpcrdma_post_sends(r_xprt, req))
+ if (frwr_send(r_xprt, req))
goto drop_connection;
rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 649c23518ec0..aaec3c9be8db 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -124,7 +124,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
* connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once).
*/
-static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
+void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
{
if (atomic_add_unless(&ep->re_force_disconnect, 1, 1))
xprt_force_disconnect(ep->re_xprt);
@@ -1350,21 +1350,6 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb)
}
/**
- * rpcrdma_post_sends - Post WRs to a transport's Send Queue
- * @r_xprt: controlling transport instance
- * @req: rpcrdma_req containing the Send WR to post
- *
- * Returns 0 if the post was successful, otherwise -ENOTCONN
- * is returned.
- */
-int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
-{
- if (frwr_send(r_xprt, req))
- return -ENOTCONN;
- return 0;
-}
-
-/**
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
* @needed: current credit grant
@@ -1416,12 +1401,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
- if (atomic_dec_return(&ep->re_receiving) > 0)
- complete(&ep->re_done);
-
-out:
- trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
+ trace_xprtrdma_post_recvs_err(r_xprt, rc);
for (wr = bad_wr; wr;) {
struct rpcrdma_rep *rep;
@@ -1431,6 +1412,11 @@ out:
--count;
}
}
+ if (atomic_dec_return(&ep->re_receiving) > 0)
+ complete(&ep->re_done);
+
+out:
+ trace_xprtrdma_post_recvs(r_xprt, count);
ep->re_receive_count += count;
return;
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5d231d94e944..d91f54eae00b 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -454,11 +454,11 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Endpoint calls - xprtrdma/verbs.c
*/
+void rpcrdma_force_disconnect(struct rpcrdma_ep *ep);
void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
-int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b7dbdcbdeb6c..04f1b78bcbca 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1656,7 +1656,7 @@ static int xs_get_srcport(struct sock_xprt *transport)
unsigned short get_srcport(struct rpc_xprt *xprt)
{
struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt);
- return sock->srcport;
+ return xs_sock_getport(sock->sock);
}
EXPORT_SYMBOL(get_srcport);
@@ -2099,13 +2099,20 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
if (sock == NULL)
return;
+ if (!xprt->reuseport) {
+ xs_close(xprt);
+ return;
+ }
switch (skst) {
- default:
+ case TCP_FIN_WAIT1:
+ case TCP_FIN_WAIT2:
+ break;
+ case TCP_ESTABLISHED:
+ case TCP_CLOSE_WAIT:
kernel_sock_shutdown(sock, SHUT_RDWR);
trace_rpc_socket_shutdown(xprt, sock);
break;
- case TCP_CLOSE:
- case TCP_TIME_WAIT:
+ default:
xs_reset_transport(transport);
}
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e3105ba407c7..ad570c2450be 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1426,7 +1426,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen)
if (ua) {
if (!tipc_uaddr_valid(ua, m->msg_namelen))
return -EINVAL;
- atype = ua->addrtype;
+ atype = ua->addrtype;
}
/* If socket belongs to a communication group follow other paths */
@@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk,
u32 dport, struct sk_buff_head *xmitq)
{
- unsigned long time_limit = jiffies + 2;
+ unsigned long time_limit = jiffies + usecs_to_jiffies(20000);
struct sk_buff *skb;
unsigned int lim;
atomic_t *dcnt;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index eb47b9de2380..92345c9bb60c 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -3073,7 +3073,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
other = unix_peer(sk);
if (other && unix_peer(other) != sk &&
- unix_recvq_full(other) &&
+ unix_recvq_full_lockless(other) &&
unix_dgram_peer_wake_me(sk, other))
writable = 0;
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 3e02cc3b24f8..e2c0cfb334d2 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -2014,7 +2014,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
{
const struct vsock_transport *transport;
struct vsock_sock *vsk;
- ssize_t record_len;
+ ssize_t msg_len;
long timeout;
int err = 0;
DEFINE_WAIT(wait);
@@ -2028,9 +2028,9 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
if (err <= 0)
goto out;
- record_len = transport->seqpacket_dequeue(vsk, msg, flags);
+ msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
- if (record_len < 0) {
+ if (msg_len < 0) {
err = -ENOMEM;
goto out;
}
@@ -2044,14 +2044,14 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg,
* packet.
*/
if (flags & MSG_TRUNC)
- err = record_len;
+ err = msg_len;
else
err = len - msg_data_left(msg);
/* Always set MSG_TRUNC if real length of packet is
* bigger than user's buffer.
*/
- if (record_len > len)
+ if (msg_len > len)
msg->msg_flags |= MSG_TRUNC;
}
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 081e7ae93cb1..59ee1be5a6dd 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -76,8 +76,12 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
goto out;
if (msg_data_left(info->msg) == 0 &&
- info->type == VIRTIO_VSOCK_TYPE_SEQPACKET)
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+ pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+ if (info->msg->msg_flags & MSG_EOR)
+ pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ }
}
trace_virtio_transport_alloc_pkt(src_cid, src_port,
@@ -457,9 +461,12 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
dequeued_len += pkt_len;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
msg_ready = true;
vvs->msg_count--;
+
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+ msg->msg_flags |= MSG_EOR;
}
virtio_transport_dec_rx_pkt(vvs, pkt);
@@ -1029,7 +1036,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
goto out;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count++;
/* Try to copy small packets into the buffer of last packet queued,
@@ -1044,12 +1051,12 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
/* If there is space in the last packet queued, we copy the
* new packet in its buffer. We avoid this if the last packet
- * queued has VIRTIO_VSOCK_SEQ_EOR set, because this is
- * delimiter of SEQPACKET record, so 'pkt' is the first packet
- * of a new record.
+ * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
+ * delimiter of SEQPACKET message, so 'pkt' is the first packet
+ * of a new message.
*/
if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
- !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) {
+ !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
pkt->len);
last_pkt->len += pkt->len;