diff options
Diffstat (limited to 'net')
76 files changed, 753 insertions, 416 deletions
diff --git a/net/9p/client.c b/net/9p/client.c index b7b958f61faf..213f12ed76cd 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -30,6 +30,8 @@ #define CREATE_TRACE_POINTS #include <trace/events/9p.h> +#define DEFAULT_MSIZE (128 * 1024) + /* * Client Option Parsing (code inspired by NFS code) * - a little lazy - parse all client options @@ -65,7 +67,7 @@ EXPORT_SYMBOL(p9_is_proto_dotu); int p9_show_client_options(struct seq_file *m, struct p9_client *clnt) { - if (clnt->msize != 8192) + if (clnt->msize != DEFAULT_MSIZE) seq_printf(m, ",msize=%u", clnt->msize); seq_printf(m, ",trans=%s", clnt->trans_mod->name); @@ -139,7 +141,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) int ret = 0; clnt->proto_version = p9_proto_2000L; - clnt->msize = 8192; + clnt->msize = DEFAULT_MSIZE; if (!opts) return 0; diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index f4dd0456beaf..007bbcc68010 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -34,7 +34,7 @@ #include <linux/syscalls.h> /* killme */ #define P9_PORT 564 -#define MAX_SOCK_BUF (64*1024) +#define MAX_SOCK_BUF (1024*1024) #define MAXPOLLWADDR 2 static struct p9_trans_module p9_tcp_trans; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 2bbd7dce0f1d..490a4c900339 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -610,7 +610,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->vc_wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); if (!chan->vc_wq) { err = -ENOMEM; - goto out_free_tag; + goto out_remove_file; } init_waitqueue_head(chan->vc_wq); chan->ring_bufs_avail = 1; @@ -628,6 +628,8 @@ static int p9_virtio_probe(struct virtio_device *vdev) return 0; +out_remove_file: + sysfs_remove_file(&vdev->dev.kobj, &dev_attr_mount_tag.attr); out_free_tag: kfree(tag); out_free_vq: diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c index f4fea28e05da..3ec1a51a6944 100644 --- a/net/9p/trans_xen.c +++ b/net/9p/trans_xen.c @@ -138,7 +138,7 @@ static bool p9_xen_write_todo(struct xen_9pfs_dataring *ring, RING_IDX size) static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) { - struct xen_9pfs_front_priv *priv = NULL; + struct xen_9pfs_front_priv *priv; RING_IDX cons, prod, masked_cons, masked_prod; unsigned long flags; u32 size = p9_req->tc.size; @@ -151,7 +151,7 @@ static int p9_xen_request(struct p9_client *client, struct p9_req_t *p9_req) break; } read_unlock(&xen_9pfs_lock); - if (!priv || priv->client != client) + if (list_entry_is_head(priv, &xen_9pfs_devs, list)) return -EINVAL; num = p9_req->tc.tag % priv->num_rings; diff --git a/net/batman-adv/log.c b/net/batman-adv/log.c index f0e5d1429662..7a93a1e94c40 100644 --- a/net/batman-adv/log.c +++ b/net/batman-adv/log.c @@ -7,7 +7,7 @@ #include "log.h" #include "main.h" -#include <stdarg.h> +#include <linux/stdarg.h> #include "trace.h" diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 9231617a16e4..3523c8c7068f 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -4255,7 +4255,7 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, bool del = false; brmctx = br_multicast_port_ctx_get_global(pmctx); - spin_lock(&brmctx->br->multicast_lock); + spin_lock_bh(&brmctx->br->multicast_lock); if (pmctx->multicast_router == val) { /* Refresh the temp router port timer */ if (pmctx->multicast_router == MDB_RTR_TYPE_TEMP) { @@ -4305,7 +4305,7 @@ int br_multicast_set_port_router(struct net_bridge_mcast_port *pmctx, } err = 0; unlock: - spin_unlock(&brmctx->br->multicast_lock); + spin_unlock_bh(&brmctx->br->multicast_lock); return err; } diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c index 37b67194c0df..414dc5671c45 100644 --- a/net/caif/chnl_net.c +++ b/net/caif/chnl_net.c @@ -53,20 +53,6 @@ struct chnl_net { enum caif_states state; }; -static void robust_list_del(struct list_head *delete_node) -{ - struct list_head *list_node; - struct list_head *n; - ASSERT_RTNL(); - list_for_each_safe(list_node, n, &chnl_net_list) { - if (list_node == delete_node) { - list_del(list_node); - return; - } - } - WARN_ON(1); -} - static int chnl_recv_cb(struct cflayer *layr, struct cfpkt *pkt) { struct sk_buff *skb; @@ -364,6 +350,7 @@ static int chnl_net_init(struct net_device *dev) ASSERT_RTNL(); priv = netdev_priv(dev); strncpy(priv->name, dev->name, sizeof(priv->name)); + INIT_LIST_HEAD(&priv->list_field); return 0; } @@ -372,7 +359,7 @@ static void chnl_net_uninit(struct net_device *dev) struct chnl_net *priv; ASSERT_RTNL(); priv = netdev_priv(dev); - robust_list_del(&priv->list_field); + list_del_init(&priv->list_field); } static const struct net_device_ops netdev_ops = { @@ -537,7 +524,7 @@ static void __exit chnl_exit_module(void) rtnl_lock(); list_for_each_safe(list_node, _tmp, &chnl_net_list) { dev = list_entry(list_node, struct chnl_net, list_field); - list_del(list_node); + list_del_init(list_node); delete_device(dev); } rtnl_unlock(); diff --git a/net/core/dev.c b/net/core/dev.c index 74fd402d26dd..7ee9fecd3aff 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6923,12 +6923,16 @@ EXPORT_SYMBOL(napi_disable); */ void napi_enable(struct napi_struct *n) { - BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state)); - smp_mb__before_atomic(); - clear_bit(NAPI_STATE_SCHED, &n->state); - clear_bit(NAPI_STATE_NPSVC, &n->state); - if (n->dev->threaded && n->thread) - set_bit(NAPI_STATE_THREADED, &n->state); + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + BUG_ON(!test_bit(NAPI_STATE_SCHED, &val)); + + new = val & ~(NAPIF_STATE_SCHED | NAPIF_STATE_NPSVC); + if (n->dev->threaded && n->thread) + new |= NAPIF_STATE_THREADED; + } while (cmpxchg(&n->state, val, new) != val); } EXPORT_SYMBOL(napi_enable); diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index b49c57d35a88..1a6a86693b74 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -71,11 +71,8 @@ static int update_classid_sock(const void *v, struct file *file, unsigned n) struct update_classid_context *ctx = (void *)v; struct socket *sock = sock_from_file(file); - if (sock) { - spin_lock(&cgroup_sk_update_lock); + if (sock) sock_cgroup_set_classid(&sock->sk->sk_cgrp_data, ctx->classid); - spin_unlock(&cgroup_sk_update_lock); - } if (--ctx->batch == 0) { ctx->batch = UPDATE_CLASSID_BATCH; return n + 1; @@ -121,8 +118,6 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, struct css_task_iter it; struct task_struct *p; - cgroup_sk_alloc_disable(); - cs->classid = (u32)value; css_task_iter_start(css, 0, &it); diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 99a431c56f23..8456dfbe2eb4 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -207,8 +207,6 @@ static ssize_t write_priomap(struct kernfs_open_file *of, if (!dev) return -ENODEV; - cgroup_sk_alloc_disable(); - rtnl_lock(); ret = netprio_set_prio(of_css(of), dev, prio); @@ -221,12 +219,10 @@ static ssize_t write_priomap(struct kernfs_open_file *of, static int update_netprio(const void *v, struct file *file, unsigned n) { struct socket *sock = sock_from_file(file); - if (sock) { - spin_lock(&cgroup_sk_update_lock); + + if (sock) sock_cgroup_set_prioidx(&sock->sk->sk_cgrp_data, (unsigned long)v); - spin_unlock(&cgroup_sk_update_lock); - } return 0; } @@ -235,8 +231,6 @@ static void net_prio_attach(struct cgroup_taskset *tset) struct task_struct *p; struct cgroup_subsys_state *css; - cgroup_sk_alloc_disable(); - cgroup_taskset_for_each(p, css, tset) { void *v = (void *)(unsigned long)css->id; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9e5a3249373c..a3d74e2704c4 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -3602,7 +3602,6 @@ out: static int pktgen_thread_worker(void *arg) { - DEFINE_WAIT(wait); struct pktgen_thread *t = arg; struct pktgen_dev *pkt_dev = NULL; int cpu = t->cpu; diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f9311762cc47..2170bea2c7de 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3884,7 +3884,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb, skb_push(nskb, -skb_network_offset(nskb) + offset); skb_release_head_state(nskb); - __copy_skb_header(nskb, skb); + __copy_skb_header(nskb, skb); skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb)); skb_copy_from_linear_data_offset(skb, -tnl_hlen, diff --git a/net/core/sock.c b/net/core/sock.c index 62627e868e03..512e629f9780 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3179,17 +3179,15 @@ EXPORT_SYMBOL(sock_init_data); void lock_sock_nested(struct sock *sk, int subclass) { + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); + might_sleep(); spin_lock_bh(&sk->sk_lock.slock); if (sk->sk_lock.owned) __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); - /* - * The sk_lock has mutex_lock() semantics here: - */ - mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); - local_bh_enable(); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(lock_sock_nested); @@ -3227,24 +3225,35 @@ EXPORT_SYMBOL(release_sock); */ bool lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) { + /* The sk_lock has mutex_lock() semantics here. */ + mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); + might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (!sk->sk_lock.owned) + if (!sk->sk_lock.owned) { /* - * Note : We must disable BH + * Fast path return with bottom halves disabled and + * sock::sk_lock.slock held. + * + * The 'mutex' is not contended and holding + * sock::sk_lock.slock prevents all other lockers to + * proceed so the corresponding unlock_sock_fast() can + * avoid the slow path of release_sock() completely and + * just release slock. + * + * From a semantical POV this is equivalent to 'acquiring' + * the 'mutex', hence the corresponding lockdep + * mutex_release() has to happen in the fast path of + * unlock_sock_fast(). */ return false; + } __lock_sock(sk); sk->sk_lock.owned = 1; - spin_unlock(&sk->sk_lock.slock); - /* - * The sk_lock has mutex_lock() semantics here: - */ - mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_); __acquire(&sk->sk_lock.slock); - local_bh_enable(); + spin_unlock_bh(&sk->sk_lock.slock); return true; } EXPORT_SYMBOL(lock_sock_fast); diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c index c5c74a34d139..91e7a2202697 100644 --- a/net/dccp/minisocks.c +++ b/net/dccp/minisocks.c @@ -94,6 +94,8 @@ struct sock *dccp_create_openreq_child(const struct sock *sk, newdp->dccps_role = DCCP_ROLE_SERVER; newdp->dccps_hc_rx_ackvec = NULL; newdp->dccps_service_list = NULL; + newdp->dccps_hc_rx_ccid = NULL; + newdp->dccps_hc_tx_ccid = NULL; newdp->dccps_service = dreq->dreq_service; newdp->dccps_timestamp_echo = dreq->dreq_timestamp_echo; newdp->dccps_timestamp_time = dreq->dreq_timestamp_time; diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1dc45e40f961..41f36ad8b0ec 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -345,6 +345,11 @@ bool dsa_schedule_work(struct work_struct *work) return queue_work(dsa_owq, work); } +void dsa_flush_workqueue(void) +{ + flush_workqueue(dsa_owq); +} + int dsa_devlink_param_get(struct devlink *dl, u32 id, struct devlink_param_gset_ctx *ctx) { diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 1b2b25d7bd02..b29262eee00b 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -429,6 +429,7 @@ static int dsa_port_setup(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; bool dsa_port_link_registered = false; + struct dsa_switch *ds = dp->ds; bool dsa_port_enabled = false; int err = 0; @@ -438,6 +439,12 @@ static int dsa_port_setup(struct dsa_port *dp) INIT_LIST_HEAD(&dp->fdbs); INIT_LIST_HEAD(&dp->mdbs); + if (ds->ops->port_setup) { + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + switch (dp->type) { case DSA_PORT_TYPE_UNUSED: dsa_port_disable(dp); @@ -480,8 +487,11 @@ static int dsa_port_setup(struct dsa_port *dp) dsa_port_disable(dp); if (err && dsa_port_link_registered) dsa_port_link_unregister_of(dp); - if (err) + if (err) { + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); return err; + } dp->setup = true; @@ -533,11 +543,15 @@ static int dsa_port_devlink_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch *ds = dp->ds; struct dsa_mac_addr *a, *tmp; if (!dp->setup) return; + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + devlink_port_type_clear(dlp); switch (dp->type) { @@ -581,6 +595,36 @@ static void dsa_port_devlink_teardown(struct dsa_port *dp) dp->devlink_port_setup = false; } +/* Destroy the current devlink port, and create a new one which has the UNUSED + * flavour. At this point, any call to ds->ops->port_setup has been already + * balanced out by a call to ds->ops->port_teardown, so we know that any + * devlink port regions the driver had are now unregistered. We then call its + * ds->ops->port_setup again, in order for the driver to re-create them on the + * new devlink port. + */ +static int dsa_port_reinit_as_unused(struct dsa_port *dp) +{ + struct dsa_switch *ds = dp->ds; + int err; + + dsa_port_devlink_teardown(dp); + dp->type = DSA_PORT_TYPE_UNUSED; + err = dsa_port_devlink_setup(dp); + if (err) + return err; + + if (ds->ops->port_setup) { + /* On error, leave the devlink port registered, + * dsa_switch_teardown will clean it up later. + */ + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + + return 0; +} + static int dsa_devlink_info_get(struct devlink *dl, struct devlink_info_req *req, struct netlink_ext_ack *extack) @@ -836,7 +880,7 @@ static int dsa_switch_setup(struct dsa_switch *ds) devlink_params_publish(ds->devlink); if (!ds->slave_mii_bus && ds->ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); + ds->slave_mii_bus = mdiobus_alloc(); if (!ds->slave_mii_bus) { err = -ENOMEM; goto teardown; @@ -846,13 +890,16 @@ static int dsa_switch_setup(struct dsa_switch *ds) err = mdiobus_register(ds->slave_mii_bus); if (err < 0) - goto teardown; + goto free_slave_mii_bus; } ds->setup = true; return 0; +free_slave_mii_bus: + if (ds->slave_mii_bus && ds->ops->phy_read) + mdiobus_free(ds->slave_mii_bus); teardown: if (ds->ops->teardown) ds->ops->teardown(ds); @@ -877,8 +924,11 @@ static void dsa_switch_teardown(struct dsa_switch *ds) if (!ds->setup) return; - if (ds->slave_mii_bus && ds->ops->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) { mdiobus_unregister(ds->slave_mii_bus); + mdiobus_free(ds->slave_mii_bus); + ds->slave_mii_bus = NULL; + } dsa_switch_unregister_notifier(ds); @@ -897,6 +947,33 @@ static void dsa_switch_teardown(struct dsa_switch *ds) ds->setup = false; } +/* First tear down the non-shared, then the shared ports. This ensures that + * all work items scheduled by our switchdev handlers for user ports have + * completed before we destroy the refcounting kept on the shared ports. + */ +static void dsa_tree_teardown_ports(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) + dsa_port_teardown(dp); + + dsa_flush_workqueue(); + + list_for_each_entry(dp, &dst->ports, list) + if (dsa_port_is_dsa(dp) || dsa_port_is_cpu(dp)) + dsa_port_teardown(dp); +} + +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) +{ + struct dsa_port *dp; + + list_for_each_entry(dp, &dst->ports, list) + dsa_switch_teardown(dp->ds); +} + static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -911,38 +988,22 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) list_for_each_entry(dp, &dst->ports, list) { err = dsa_port_setup(dp); if (err) { - dsa_port_devlink_teardown(dp); - dp->type = DSA_PORT_TYPE_UNUSED; - err = dsa_port_devlink_setup(dp); + err = dsa_port_reinit_as_unused(dp); if (err) goto teardown; - continue; } } return 0; teardown: - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); + dsa_tree_teardown_ports(dst); - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); + dsa_tree_teardown_switches(dst); return err; } -static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) -{ - struct dsa_port *dp; - - list_for_each_entry(dp, &dst->ports, list) - dsa_port_teardown(dp); - - list_for_each_entry(dp, &dst->ports, list) - dsa_switch_teardown(dp->ds); -} - static int dsa_tree_setup_master(struct dsa_switch_tree *dst) { struct dsa_port *dp; @@ -1034,6 +1095,7 @@ static int dsa_tree_setup(struct dsa_switch_tree *dst) teardown_master: dsa_tree_teardown_master(dst); teardown_switches: + dsa_tree_teardown_ports(dst); dsa_tree_teardown_switches(dst); teardown_cpu_ports: dsa_tree_teardown_cpu_ports(dst); @@ -1052,6 +1114,8 @@ static void dsa_tree_teardown(struct dsa_switch_tree *dst) dsa_tree_teardown_master(dst); + dsa_tree_teardown_ports(dst); + dsa_tree_teardown_switches(dst); dsa_tree_teardown_cpu_ports(dst); @@ -1546,3 +1610,53 @@ void dsa_unregister_switch(struct dsa_switch *ds) mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_unregister_switch); + +/* If the DSA master chooses to unregister its net_device on .shutdown, DSA is + * blocking that operation from completion, due to the dev_hold taken inside + * netdev_upper_dev_link. Unlink the DSA slave interfaces from being uppers of + * the DSA master, so that the system can reboot successfully. + */ +void dsa_switch_shutdown(struct dsa_switch *ds) +{ + struct net_device *master, *slave_dev; + LIST_HEAD(unregister_list); + struct dsa_port *dp; + + mutex_lock(&dsa2_mutex); + rtnl_lock(); + + list_for_each_entry(dp, &ds->dst->ports, list) { + if (dp->ds != ds) + continue; + + if (!dsa_port_is_user(dp)) + continue; + + master = dp->cpu_dp->master; + slave_dev = dp->slave; + + netdev_upper_dev_unlink(master, slave_dev); + /* Just unlinking ourselves as uppers of the master is not + * sufficient. When the master net device unregisters, that will + * also call dev_close, which we will catch as NETDEV_GOING_DOWN + * and trigger a dev_close on our own devices (dsa_slave_close). + * In turn, that will call dev_mc_unsync on the master's net + * device. If the master is also a DSA switch port, this will + * trigger dsa_slave_set_rx_mode which will call dev_mc_sync on + * its own master. Lockdep will complain about the fact that + * all cascaded masters have the same dsa_master_addr_list_lock_key, + * which it normally would not do if the cascaded masters would + * be in a proper upper/lower relationship, which we've just + * destroyed. + * To suppress the lockdep warnings, let's actually unregister + * the DSA slave interfaces too, to avoid the nonsensical + * multicast address list synchronization on shutdown. + */ + unregister_netdevice_queue(slave_dev, &unregister_list); + } + unregister_netdevice_many(&unregister_list); + + rtnl_unlock(); + mutex_unlock(&dsa2_mutex); +} +EXPORT_SYMBOL_GPL(dsa_switch_shutdown); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 33ab7d7af9eb..a5c9bc7b66c6 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -170,6 +170,7 @@ void dsa_tag_driver_put(const struct dsa_device_ops *ops); const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf); bool dsa_schedule_work(struct work_struct *work); +void dsa_flush_workqueue(void); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 662ff531d4e2..a2bf2d8ac65b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1854,13 +1854,11 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) * use the switch internal MDIO bus instead */ ret = dsa_slave_phy_connect(slave_dev, dp->index, phy_flags); - if (ret) { - netdev_err(slave_dev, - "failed to connect to port %d: %d\n", - dp->index, ret); - phylink_destroy(dp->pl); - return ret; - } + } + if (ret) { + netdev_err(slave_dev, "failed to connect to PHY: %pe\n", + ERR_PTR(ret)); + phylink_destroy(dp->pl); } return ret; diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index d37ab98e7fe1..8025ed778d33 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2019 NXP Semiconductors +/* Copyright 2019 NXP */ #include <linux/dsa/ocelot.h> #include <soc/mscc/ocelot.h> diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 3038a257ba05..59072930cb02 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -1,5 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 -/* Copyright 2020-2021 NXP Semiconductors +/* Copyright 2020-2021 NXP * * An implementation of the software-defined tag_8021q.c tagger format, which * also preserves full functionality under a vlan_filtering bridge. It does diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index 40811bab4d09..f920487ae145 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -54,9 +54,10 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb, p = (__be16 *)tag; *p = htons(RTL4_A_ETHERTYPE); - out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8); - /* The lower bits is the port number */ - out |= (u8)dp->index; + out = (RTL4_A_PROTOCOL_RTL8366RB << RTL4_A_PROTOCOL_SHIFT) | (2 << 8); + /* The lower bits indicate the port number */ + out |= BIT(dp->index); + p = (__be16 *)(tag + 2); *p = htons(out); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 7fbd0b532f52..099259fc826a 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -465,16 +465,14 @@ void cipso_v4_doi_free(struct cipso_v4_doi *doi_def) if (!doi_def) return; - if (doi_def->map.std) { - switch (doi_def->type) { - case CIPSO_V4_MAP_TRANS: - kfree(doi_def->map.std->lvl.cipso); - kfree(doi_def->map.std->lvl.local); - kfree(doi_def->map.std->cat.cipso); - kfree(doi_def->map.std->cat.local); - kfree(doi_def->map.std); - break; - } + switch (doi_def->type) { + case CIPSO_V4_MAP_TRANS: + kfree(doi_def->map.std->lvl.cipso); + kfree(doi_def->map.std->lvl.local); + kfree(doi_def->map.std->cat.cipso); + kfree(doi_def->map.std->cat.local); + kfree(doi_def->map.std); + break; } kfree(doi_def); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 177d26d8fb9c..0fe6c936dc54 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -473,8 +473,6 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev, static int gre_handle_offloads(struct sk_buff *skb, bool csum) { - if (csum && skb_checksum_start(skb) < skb->data) - return -EINVAL; return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } @@ -632,15 +630,20 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb, } if (dev->header_ops) { + const int pull_len = tunnel->hlen + sizeof(struct iphdr); + if (skb_cow_head(skb, 0)) goto free_skb; tnl_params = (const struct iphdr *)skb->data; + if (pull_len > skb_transport_offset(skb)) + goto free_skb; + /* Pull skb since ip_tunnel_xmit() needs skb->data pointing * to gre header. */ - skb_pull(skb, tunnel->hlen + sizeof(struct iphdr)); + skb_pull(skb, pull_len); skb_reset_mac_header(skb); } else { if (skb_cow_head(skb, dev->needed_headroom)) diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 4075230b14c6..9e8100728d46 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -1982,6 +1982,8 @@ static int replace_nexthop_grp(struct net *net, struct nexthop *old, rcu_assign_pointer(old->nh_grp, newg); if (newg->resilient) { + /* Make sure concurrent readers are not using 'oldg' anymore. */ + synchronize_net(); rcu_assign_pointer(oldg->res_table, tmp_table); rcu_assign_pointer(oldg->spare->res_table, tmp_table); } @@ -2490,6 +2492,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh, .fc_gw4 = cfg->gw.ipv4, .fc_gw_family = cfg->gw.ipv4 ? AF_INET : 0, .fc_flags = cfg->nh_flags, + .fc_nlinfo = cfg->nlinfo, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, }; @@ -2528,6 +2531,7 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, .fc_ifindex = cfg->nh_ifindex, .fc_gateway = cfg->gw.ipv6, .fc_flags = cfg->nh_flags, + .fc_nlinfo = cfg->nlinfo, .fc_encap = cfg->nh_encap, .fc_encap_type = cfg->nh_encap_type, .fc_is_fdb = cfg->nh_fdb, @@ -3563,6 +3567,7 @@ static struct notifier_block nh_netdev_notifier = { }; static int nexthops_dump(struct net *net, struct notifier_block *nb, + enum nexthop_event_type event_type, struct netlink_ext_ack *extack) { struct rb_root *root = &net->nexthop.rb_root; @@ -3573,8 +3578,7 @@ static int nexthops_dump(struct net *net, struct notifier_block *nb, struct nexthop *nh; nh = rb_entry(node, struct nexthop, rb_node); - err = call_nexthop_notifier(nb, net, NEXTHOP_EVENT_REPLACE, nh, - extack); + err = call_nexthop_notifier(nb, net, event_type, nh, extack); if (err) break; } @@ -3588,7 +3592,7 @@ int register_nexthop_notifier(struct net *net, struct notifier_block *nb, int err; rtnl_lock(); - err = nexthops_dump(net, nb, extack); + err = nexthops_dump(net, nb, NEXTHOP_EVENT_REPLACE, extack); if (err) goto unlock; err = blocking_notifier_chain_register(&net->nexthop.notifier_chain, @@ -3601,8 +3605,17 @@ EXPORT_SYMBOL(register_nexthop_notifier); int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb) { - return blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, - nb); + int err; + + rtnl_lock(); + err = blocking_notifier_chain_unregister(&net->nexthop.notifier_chain, + nb); + if (err) + goto unlock; + nexthops_dump(net, nb, NEXTHOP_EVENT_DEL, NULL); +unlock: + rtnl_unlock(); + return err; } EXPORT_SYMBOL(unregister_nexthop_notifier); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3f7bd7ae7d7a..141e85e6422b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1346,7 +1346,7 @@ static u8 tcp_sacktag_one(struct sock *sk, if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) - tp->undo_retrans--; + tp->undo_retrans = max_t(int, 0, tp->undo_retrans - pcount); if ((sacked & TCPCB_SACKED_ACKED) && before(start_seq, state->reord)) state->reord = start_seq; diff --git a/net/ipv4/udp_tunnel_nic.c b/net/ipv4/udp_tunnel_nic.c index 0d122edc368d..b91003538d87 100644 --- a/net/ipv4/udp_tunnel_nic.c +++ b/net/ipv4/udp_tunnel_nic.c @@ -935,7 +935,7 @@ static int __init udp_tunnel_nic_init_module(void) { int err; - udp_tunnel_nic_workqueue = alloc_workqueue("udp_tunnel_nic", 0, 0); + udp_tunnel_nic_workqueue = alloc_ordered_workqueue("udp_tunnel_nic", 0); if (!udp_tunnel_nic_workqueue) return -ENOMEM; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 17756f3ed33b..c6a90b7bbb70 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3092,19 +3092,22 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, } } -#if IS_ENABLED(CONFIG_IPV6_SIT) -static void sit_add_v4_addrs(struct inet6_dev *idev) +#if IS_ENABLED(CONFIG_IPV6_SIT) || IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) +static void add_v4_addrs(struct inet6_dev *idev) { struct in6_addr addr; struct net_device *dev; struct net *net = dev_net(idev->dev); - int scope, plen; + int scope, plen, offset = 0; u32 pflags = 0; ASSERT_RTNL(); memset(&addr, 0, sizeof(struct in6_addr)); - memcpy(&addr.s6_addr32[3], idev->dev->dev_addr, 4); + /* in case of IP6GRE the dev_addr is an IPv6 and therefore we use only the last 4 bytes */ + if (idev->dev->addr_len == sizeof(struct in6_addr)) + offset = sizeof(struct in6_addr) - 4; + memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4); if (idev->dev->flags&IFF_POINTOPOINT) { addr.s6_addr32[0] = htonl(0xfe800000); @@ -3342,8 +3345,6 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_IEEE1394) && (dev->type != ARPHRD_TUNNEL6) && (dev->type != ARPHRD_6LOWPAN) && - (dev->type != ARPHRD_IP6GRE) && - (dev->type != ARPHRD_IPGRE) && (dev->type != ARPHRD_TUNNEL) && (dev->type != ARPHRD_NONE) && (dev->type != ARPHRD_RAWIP)) { @@ -3391,14 +3392,14 @@ static void addrconf_sit_config(struct net_device *dev) return; } - sit_add_v4_addrs(idev); + add_v4_addrs(idev); if (dev->flags&IFF_POINTOPOINT) addrconf_add_mroute(dev); } #endif -#if IS_ENABLED(CONFIG_NET_IPGRE) +#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) static void addrconf_gre_config(struct net_device *dev) { struct inet6_dev *idev; @@ -3411,7 +3412,13 @@ static void addrconf_gre_config(struct net_device *dev) return; } - addrconf_addr_gen(idev, true); + if (dev->type == ARPHRD_ETHER) { + addrconf_addr_gen(idev, true); + return; + } + + add_v4_addrs(idev); + if (dev->flags & IFF_POINTOPOINT) addrconf_add_mroute(dev); } @@ -3587,7 +3594,8 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, addrconf_sit_config(dev); break; #endif -#if IS_ENABLED(CONFIG_NET_IPGRE) +#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE) + case ARPHRD_IP6GRE: case ARPHRD_IPGRE: addrconf_gre_config(dev); break; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1bec5b22f80d..0371d2c14145 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1378,7 +1378,6 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, int err = -ENOMEM; int allow_create = 1; int replace_required = 0; - int sernum = fib6_new_sernum(info->nl_net); if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -1478,7 +1477,7 @@ int fib6_add(struct fib6_node *root, struct fib6_info *rt, if (!err) { if (rt->nh) list_add(&rt->nh_list, &rt->nh->f6i_list); - __fib6_update_sernum_upto_root(rt, sernum); + __fib6_update_sernum_upto_root(rt, fib6_new_sernum(info->nl_net)); fib6_start_gc(info->nl_net, rt); } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 7baf41d160f5..3ad201d372d8 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -629,8 +629,6 @@ drop: static int gre_handle_offloads(struct sk_buff *skb, bool csum) { - if (csum && skb_checksum_start(skb) < skb->data) - return -EINVAL; return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE); } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index cd951faa2fac..bed8155508c8 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1356,8 +1356,8 @@ static int mld_process_v1(struct inet6_dev *idev, struct mld_msg *mld, return 0; } -static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, - unsigned long *max_delay) +static void mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, + unsigned long *max_delay) { *max_delay = max(msecs_to_jiffies(mldv2_mrc(mld)), 1UL); @@ -1367,7 +1367,7 @@ static int mld_process_v2(struct inet6_dev *idev, struct mld2_query *mld, idev->mc_maxdelay = *max_delay; - return 0; + return; } /* called with rcu_read_lock() */ @@ -1454,9 +1454,7 @@ static void __mld_query_work(struct sk_buff *skb) mlh2 = (struct mld2_query *)skb_transport_header(skb); - err = mld_process_v2(idev, mlh2, &max_delay); - if (err < 0) - goto out; + mld_process_v2(idev, mlh2, &max_delay); if (group_type == IPV6_ADDR_ANY) { /* general query */ if (mlh2->mld2q_nsrcs) diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index 6fd54744cbc3..aa5bb8789ba0 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -99,7 +99,7 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, { __be16 dport, sport; const struct in6_addr *daddr = NULL, *saddr = NULL; - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr *iph = ipv6_hdr(skb), ipv6_var; struct sk_buff *data_skb = NULL; int doff = 0; int thoff = 0, tproto; @@ -129,8 +129,6 @@ struct sock *nf_sk_lookup_slow_v6(struct net *net, const struct sk_buff *skb, thoff + sizeof(*hp); } else if (tproto == IPPROTO_ICMPV6) { - struct ipv6hdr ipv6_var; - if (extract_icmp6_fields(skb, thoff, &tproto, &saddr, &daddr, &sport, &dport, &ipv6_var)) return NULL; diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 1bf5f5ae75ac..3adc5d9211ad 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -385,7 +385,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, struct dst_entry *orig_dst = skb_dst(skb); struct dst_entry *dst = NULL; struct seg6_lwt *slwt; - int err = -EINVAL; + int err; err = seg6_do_srh(skb); if (unlikely(err)) diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 53486b162f01..93271a2632b8 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -869,8 +869,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb) } if (tunnel->version == L2TP_HDR_VER_3 && - l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) + l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr)) { + l2tp_session_dec_refcount(session); goto invalid; + } l2tp_recv_common(session, skb, ptr, optr, hdrflags, length); l2tp_session_dec_refcount(session); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 1cf5ac09edcb..323d3d2d986f 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -617,7 +617,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, { struct net_device *ndev = NULL; struct ieee802154_sub_if_data *sdata = NULL; - int ret = -ENOMEM; + int ret; ASSERT_RTNL(); diff --git a/net/mctp/route.c b/net/mctp/route.c index 5265525011ad..5ca186d53cb0 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -1083,8 +1083,10 @@ static void __net_exit mctp_routes_net_exit(struct net *net) { struct mctp_route *rt; + rcu_read_lock(); list_for_each_entry_rcu(rt, &net->mctp.routes, list) mctp_route_release(rt); + rcu_read_unlock(); } static struct pernet_operations mctp_net_ops = { diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 1e4289c507ff..c4f9a5ce3815 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -644,15 +644,12 @@ void mptcp_pm_nl_addr_send_ack(struct mptcp_sock *msk) subflow = list_first_entry_or_null(&msk->conn_list, typeof(*subflow), node); if (subflow) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow; spin_unlock_bh(&msk->pm.lock); pr_debug("send ack for %s", mptcp_pm_should_add_signal(msk) ? "add_addr" : "rm_addr"); - slow = lock_sock_fast(ssk); - tcp_send_ack(ssk); - unlock_sock_fast(ssk, slow); + mptcp_subflow_send_ack(ssk); spin_lock_bh(&msk->pm.lock); } } @@ -669,7 +666,6 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, struct sock *ssk = mptcp_subflow_tcp_sock(subflow); struct sock *sk = (struct sock *)msk; struct mptcp_addr_info local; - bool slow; local_address((struct sock_common *)ssk, &local); if (!addresses_equal(&local, addr, addr->port)) @@ -682,9 +678,7 @@ int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk, spin_unlock_bh(&msk->pm.lock); pr_debug("send ack for mp_prio"); - slow = lock_sock_fast(ssk); - tcp_send_ack(ssk); - unlock_sock_fast(ssk, slow); + mptcp_subflow_send_ack(ssk); spin_lock_bh(&msk->pm.lock); return 0; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index ade648c3512b..dbcebf56798f 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -440,19 +440,22 @@ static bool tcp_can_send_ack(const struct sock *ssk) (TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN)); } +void mptcp_subflow_send_ack(struct sock *ssk) +{ + bool slow; + + slow = lock_sock_fast(ssk); + if (tcp_can_send_ack(ssk)) + tcp_send_ack(ssk); + unlock_sock_fast(ssk, slow); +} + static void mptcp_send_ack(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - bool slow; - - slow = lock_sock_fast(ssk); - if (tcp_can_send_ack(ssk)) - tcp_send_ack(ssk); - unlock_sock_fast(ssk, slow); - } + mptcp_for_each_subflow(msk, subflow) + mptcp_subflow_send_ack(mptcp_subflow_tcp_sock(subflow)); } static void mptcp_subflow_cleanup_rbuf(struct sock *ssk) @@ -1003,6 +1006,13 @@ static void mptcp_wmem_uncharge(struct sock *sk, int size) msk->wmem_reserved += size; } +static void __mptcp_mem_reclaim_partial(struct sock *sk) +{ + lockdep_assert_held_once(&sk->sk_lock.slock); + __mptcp_update_wmem(sk); + sk_mem_reclaim_partial(sk); +} + static void mptcp_mem_reclaim_partial(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -1094,12 +1104,8 @@ static void __mptcp_clean_una(struct sock *sk) msk->recovery = false; out: - if (cleaned) { - if (tcp_under_memory_pressure(sk)) { - __mptcp_update_wmem(sk); - sk_mem_reclaim_partial(sk); - } - } + if (cleaned && tcp_under_memory_pressure(sk)) + __mptcp_mem_reclaim_partial(sk); if (snd_una == READ_ONCE(msk->snd_nxt) && !msk->recovery) { if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) @@ -1179,6 +1185,7 @@ struct mptcp_sendmsg_info { u16 limit; u16 sent; unsigned int flags; + bool data_lock_held; }; static int mptcp_check_allowed_size(struct mptcp_sock *msk, u64 data_seq, @@ -1250,17 +1257,17 @@ static bool __mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, gfp_t gfp) return false; } -static bool mptcp_must_reclaim_memory(struct sock *sk, struct sock *ssk) +static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, bool data_lock_held) { - return !ssk->sk_tx_skb_cache && - tcp_under_memory_pressure(sk); -} + gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation; -static bool mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk) -{ - if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) - mptcp_mem_reclaim_partial(sk); - return __mptcp_alloc_tx_skb(sk, ssk, sk->sk_allocation); + if (unlikely(tcp_under_memory_pressure(sk))) { + if (data_lock_held) + __mptcp_mem_reclaim_partial(sk); + else + mptcp_mem_reclaim_partial(sk); + } + return __mptcp_alloc_tx_skb(sk, ssk, gfp); } /* note: this always recompute the csum on the whole skb, even @@ -1284,7 +1291,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, bool zero_window_probe = false; struct mptcp_ext *mpext = NULL; struct sk_buff *skb, *tail; - bool can_collapse = false; + bool must_collapse = false; int size_bias = 0; int avail_size; size_t ret = 0; @@ -1304,16 +1311,24 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, * SSN association set here */ mpext = skb_ext_find(skb, SKB_EXT_MPTCP); - can_collapse = (info->size_goal - skb->len > 0) && - mptcp_skb_can_collapse_to(data_seq, skb, mpext); - if (!can_collapse) { + if (!mptcp_skb_can_collapse_to(data_seq, skb, mpext)) { TCP_SKB_CB(skb)->eor = 1; - } else { + goto alloc_skb; + } + + must_collapse = (info->size_goal > skb->len) && + (skb_shinfo(skb)->nr_frags < sysctl_max_skb_frags); + if (must_collapse) { size_bias = skb->len; avail_size = info->size_goal - skb->len; } } +alloc_skb: + if (!must_collapse && + !mptcp_alloc_tx_skb(sk, ssk, info->data_lock_held)) + return 0; + /* Zero window and all data acked? Probe. */ avail_size = mptcp_check_allowed_size(msk, data_seq, avail_size); if (avail_size == 0) { @@ -1343,7 +1358,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, if (skb == tail) { TCP_SKB_CB(tail)->tcp_flags &= ~TCPHDR_PSH; mpext->data_len += ret; - WARN_ON_ONCE(!can_collapse); WARN_ON_ONCE(zero_window_probe); goto out; } @@ -1530,15 +1544,6 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) if (ssk != prev_ssk) lock_sock(ssk); - /* keep it simple and always provide a new skb for the - * subflow, even if we will not use it when collapsing - * on the pending one - */ - if (!mptcp_alloc_tx_skb(sk, ssk)) { - mptcp_push_release(sk, ssk, &info); - goto out; - } - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) { mptcp_push_release(sk, ssk, &info); @@ -1571,7 +1576,9 @@ out: static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) { struct mptcp_sock *msk = mptcp_sk(sk); - struct mptcp_sendmsg_info info; + struct mptcp_sendmsg_info info = { + .data_lock_held = true, + }; struct mptcp_data_frag *dfrag; struct sock *xmit_ssk; int len, copied = 0; @@ -1597,13 +1604,6 @@ static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk) goto out; } - if (unlikely(mptcp_must_reclaim_memory(sk, ssk))) { - __mptcp_update_wmem(sk); - sk_mem_reclaim_partial(sk); - } - if (!__mptcp_alloc_tx_skb(sk, ssk, GFP_ATOMIC)) - goto out; - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) goto out; @@ -2409,9 +2409,6 @@ static void __mptcp_retrans(struct sock *sk) info.sent = 0; info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; while (info.sent < info.limit) { - if (!mptcp_alloc_tx_skb(sk, ssk)) - break; - ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) break; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d7aba1c4dc48..d3e6fd1615f1 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -34,7 +34,7 @@ #define OPTIONS_MPTCP_MPC (OPTION_MPTCP_MPC_SYN | OPTION_MPTCP_MPC_SYNACK | \ OPTION_MPTCP_MPC_ACK) #define OPTIONS_MPTCP_MPJ (OPTION_MPTCP_MPJ_SYN | OPTION_MPTCP_MPJ_SYNACK | \ - OPTION_MPTCP_MPJ_SYNACK) + OPTION_MPTCP_MPJ_ACK) /* MPTCP option subtypes */ #define MPTCPOPT_MP_CAPABLE 0 @@ -573,6 +573,7 @@ void __init mptcp_subflow_init(void); void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how); void mptcp_close_ssk(struct sock *sk, struct sock *ssk, struct mptcp_subflow_context *subflow); +void mptcp_subflow_send_ack(struct sock *ssk); void mptcp_subflow_reset(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 0b6cfd3b31e0..03757e76bb6b 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -80,6 +80,7 @@ enum { #define NCSI_OEM_MFR_BCM_ID 0x113d #define NCSI_OEM_MFR_INTEL_ID 0x157 /* Intel specific OEM command */ +#define NCSI_OEM_INTEL_CMD_GMA 0x06 /* CMD ID for Get MAC */ #define NCSI_OEM_INTEL_CMD_KEEP_PHY 0x20 /* CMD ID for Keep PHY up */ /* Broadcom specific OEM Command */ #define NCSI_OEM_BCM_CMD_GMA 0x01 /* CMD ID for Get MAC */ @@ -89,6 +90,7 @@ enum { #define NCSI_OEM_MLX_CMD_SMAF 0x01 /* CMD ID for Set MC Affinity */ #define NCSI_OEM_MLX_CMD_SMAF_PARAM 0x07 /* Parameter for SMAF */ /* OEM Command payload lengths*/ +#define NCSI_OEM_INTEL_CMD_GMA_LEN 5 #define NCSI_OEM_INTEL_CMD_KEEP_PHY_LEN 7 #define NCSI_OEM_BCM_CMD_GMA_LEN 12 #define NCSI_OEM_MLX_CMD_GMA_LEN 8 @@ -99,6 +101,7 @@ enum { /* Mac address offset in OEM response */ #define BCM_MAC_ADDR_OFFSET 28 #define MLX_MAC_ADDR_OFFSET 8 +#define INTEL_MAC_ADDR_OFFSET 1 struct ncsi_channel_version { diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index 89c7742cd72e..7121ce2a47c0 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -795,13 +795,36 @@ static int ncsi_oem_smaf_mlx(struct ncsi_cmd_arg *nca) return ret; } +static int ncsi_oem_gma_handler_intel(struct ncsi_cmd_arg *nca) +{ + unsigned char data[NCSI_OEM_INTEL_CMD_GMA_LEN]; + int ret = 0; + + nca->payload = NCSI_OEM_INTEL_CMD_GMA_LEN; + + memset(data, 0, NCSI_OEM_INTEL_CMD_GMA_LEN); + *(unsigned int *)data = ntohl((__force __be32)NCSI_OEM_MFR_INTEL_ID); + data[4] = NCSI_OEM_INTEL_CMD_GMA; + + nca->data = data; + + ret = ncsi_xmit_cmd(nca); + if (ret) + netdev_err(nca->ndp->ndev.dev, + "NCSI: Failed to transmit cmd 0x%x during configure\n", + nca->type); + + return ret; +} + /* OEM Command handlers initialization */ static struct ncsi_oem_gma_handler { unsigned int mfr_id; int (*handler)(struct ncsi_cmd_arg *nca); } ncsi_oem_gma_handlers[] = { { NCSI_OEM_MFR_BCM_ID, ncsi_oem_gma_handler_bcm }, - { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx } + { NCSI_OEM_MFR_MLX_ID, ncsi_oem_gma_handler_mlx }, + { NCSI_OEM_MFR_INTEL_ID, ncsi_oem_gma_handler_intel } }; static int ncsi_gma_handler(struct ncsi_cmd_arg *nca, unsigned int mf_id) diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index 80938b338fee..ba66c7dc3a21 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -178,6 +178,12 @@ struct ncsi_rsp_oem_bcm_pkt { unsigned char data[]; /* Cmd specific Data */ }; +/* Intel Response Data */ +struct ncsi_rsp_oem_intel_pkt { + unsigned char cmd; /* OEM Command ID */ + unsigned char data[]; /* Cmd specific Data */ +}; + /* Get Link Status */ struct ncsi_rsp_gls_pkt { struct ncsi_rsp_pkt_hdr rsp; /* Response header */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index d48374894817..6447a09932f5 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -699,9 +699,51 @@ static int ncsi_rsp_handler_oem_bcm(struct ncsi_request *nr) return 0; } +/* Response handler for Intel command Get Mac Address */ +static int ncsi_rsp_handler_oem_intel_gma(struct ncsi_request *nr) +{ + struct ncsi_dev_priv *ndp = nr->ndp; + struct net_device *ndev = ndp->ndev.dev; + const struct net_device_ops *ops = ndev->netdev_ops; + struct ncsi_rsp_oem_pkt *rsp; + struct sockaddr saddr; + int ret = 0; + + /* Get the response header */ + rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); + + saddr.sa_family = ndev->type; + ndev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + memcpy(saddr.sa_data, &rsp->data[INTEL_MAC_ADDR_OFFSET], ETH_ALEN); + /* Increase mac address by 1 for BMC's address */ + eth_addr_inc((u8 *)saddr.sa_data); + if (!is_valid_ether_addr((const u8 *)saddr.sa_data)) + return -ENXIO; + + /* Set the flag for GMA command which should only be called once */ + ndp->gma_flag = 1; + + ret = ops->ndo_set_mac_address(ndev, &saddr); + if (ret < 0) + netdev_warn(ndev, + "NCSI: 'Writing mac address to device failed\n"); + + return ret; +} + /* Response handler for Intel card */ static int ncsi_rsp_handler_oem_intel(struct ncsi_request *nr) { + struct ncsi_rsp_oem_intel_pkt *intel; + struct ncsi_rsp_oem_pkt *rsp; + + /* Get the response header */ + rsp = (struct ncsi_rsp_oem_pkt *)skb_network_header(nr->rsp); + intel = (struct ncsi_rsp_oem_intel_pkt *)(rsp->data); + + if (intel->cmd == NCSI_OEM_INTEL_CMD_GMA) + return ncsi_rsp_handler_oem_intel_gma(nr); + return 0; } diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index d31dbccbe7bd..94e18fb9690d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -21,7 +21,6 @@ #include <linux/stddef.h> #include <linux/slab.h> #include <linux/random.h> -#include <linux/jhash.h> #include <linux/siphash.h> #include <linux/err.h> #include <linux/percpu.h> @@ -78,6 +77,8 @@ static __read_mostly bool nf_conntrack_locks_all; #define GC_SCAN_INTERVAL (120u * HZ) #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) +#define MAX_CHAINLEN 64u + static struct conntrack_gc_work conntrack_gc_work; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) @@ -184,25 +185,31 @@ EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_max); seqcount_spinlock_t nf_conntrack_generation __read_mostly; -static unsigned int nf_conntrack_hash_rnd __read_mostly; +static siphash_key_t nf_conntrack_hash_rnd __read_mostly; static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, const struct net *net) { - unsigned int n; - u32 seed; + struct { + struct nf_conntrack_man src; + union nf_inet_addr dst_addr; + u32 net_mix; + u16 dport; + u16 proto; + } __aligned(SIPHASH_ALIGNMENT) combined; get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); - /* The direction must be ignored, so we hash everything up to the - * destination ports (which is a multiple of 4) and treat the last - * three bytes manually. - */ - seed = nf_conntrack_hash_rnd ^ net_hash_mix(net); - n = (sizeof(tuple->src) + sizeof(tuple->dst.u3)) / sizeof(u32); - return jhash2((u32 *)tuple, n, seed ^ - (((__force __u16)tuple->dst.u.all << 16) | - tuple->dst.protonum)); + memset(&combined, 0, sizeof(combined)); + + /* The direction must be ignored, so handle usable members manually. */ + combined.src = tuple->src; + combined.dst_addr = tuple->dst.u3; + combined.net_mix = net_hash_mix(net); + combined.dport = (__force __u16)tuple->dst.u.all; + combined.proto = tuple->dst.protonum; + + return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd); } static u32 scale_hash(u32 hash) @@ -835,7 +842,9 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_node *n; + unsigned int chainlen = 0; unsigned int sequence; + int err = -EEXIST; zone = nf_ct_zone(ct); @@ -849,15 +858,24 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) } while (nf_conntrack_double_lock(net, hash, reply_hash, sequence)); /* See if there's one in the list already, including reverse */ - hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; - hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) + if (chainlen++ > MAX_CHAINLEN) + goto chaintoolong; + } + + chainlen = 0; + + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; + if (chainlen++ > MAX_CHAINLEN) + goto chaintoolong; + } smp_wmb(); /* The caller holds a reference to this object */ @@ -867,11 +885,13 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) NF_CT_STAT_INC(net, insert); local_bh_enable(); return 0; - +chaintoolong: + NF_CT_STAT_INC(net, chaintoolong); + err = -ENOSPC; out: nf_conntrack_double_unlock(hash, reply_hash); local_bh_enable(); - return -EEXIST; + return err; } EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert); @@ -1084,6 +1104,7 @@ int __nf_conntrack_confirm(struct sk_buff *skb) { const struct nf_conntrack_zone *zone; + unsigned int chainlen = 0, sequence; unsigned int hash, reply_hash; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; @@ -1091,7 +1112,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) struct hlist_nulls_node *n; enum ip_conntrack_info ctinfo; struct net *net; - unsigned int sequence; int ret = NF_DROP; ct = nf_ct_get(skb, &ctinfo); @@ -1151,15 +1171,28 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, zone, net)) goto out; + if (chainlen++ > MAX_CHAINLEN) + goto chaintoolong; + } - hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) + chainlen = 0; + hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[reply_hash], hnnode) { if (nf_ct_key_equal(h, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, zone, net)) goto out; + if (chainlen++ > MAX_CHAINLEN) { +chaintoolong: + nf_ct_add_to_dying_list(ct); + NF_CT_STAT_INC(net, chaintoolong); + NF_CT_STAT_INC(net, insert_failed); + ret = NF_DROP; + goto dying; + } + } /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in @@ -2594,26 +2627,24 @@ int nf_conntrack_init_start(void) spin_lock_init(&nf_conntrack_locks[i]); if (!nf_conntrack_htable_size) { - /* Idea from tcp.c: use 1/16384 of memory. - * On i386: 32MB machine has 512 buckets. - * >= 1GB machines have 16384 buckets. - * >= 4GB machines have 65536 buckets. - */ nf_conntrack_htable_size = (((nr_pages << PAGE_SHIFT) / 16384) / sizeof(struct hlist_head)); - if (nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) - nf_conntrack_htable_size = 65536; + if (BITS_PER_LONG >= 64 && + nr_pages > (4 * (1024 * 1024 * 1024 / PAGE_SIZE))) + nf_conntrack_htable_size = 262144; else if (nr_pages > (1024 * 1024 * 1024 / PAGE_SIZE)) - nf_conntrack_htable_size = 16384; - if (nf_conntrack_htable_size < 32) - nf_conntrack_htable_size = 32; - - /* Use a max. factor of four by default to get the same max as - * with the old struct list_heads. When a table size is given - * we use the old value of 8 to avoid reducing the max. - * entries. */ - max_factor = 4; + nf_conntrack_htable_size = 65536; + + if (nf_conntrack_htable_size < 1024) + nf_conntrack_htable_size = 1024; + /* Use a max. factor of one by default to keep the average + * hash chain length at 2 entries. Each entry has to be added + * twice (once for original direction, once for reply). + * When a table size is given we use the old value of 8 to + * avoid implicit reduction of the max entries setting. + */ + max_factor = 1; } nf_conntrack_hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size, 1); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 1e851bc2e61a..f562eeef4234 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -17,7 +17,7 @@ #include <linux/err.h> #include <linux/percpu.h> #include <linux/kernel.h> -#include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/moduleparam.h> #include <linux/export.h> #include <net/net_namespace.h> @@ -41,7 +41,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_hash); unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static unsigned int nf_ct_expect_hashrnd __read_mostly; +static siphash_key_t nf_ct_expect_hashrnd __read_mostly; /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, @@ -81,15 +81,26 @@ static void nf_ct_expectation_timed_out(struct timer_list *t) static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple) { - unsigned int hash, seed; + struct { + union nf_inet_addr dst_addr; + u32 net_mix; + u16 dport; + u8 l3num; + u8 protonum; + } __aligned(SIPHASH_ALIGNMENT) combined; + u32 hash; get_random_once(&nf_ct_expect_hashrnd, sizeof(nf_ct_expect_hashrnd)); - seed = nf_ct_expect_hashrnd ^ net_hash_mix(n); + memset(&combined, 0, sizeof(combined)); - hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all), - (((tuple->dst.protonum ^ tuple->src.l3num) << 16) | - (__force __u16)tuple->dst.u.all) ^ seed); + combined.dst_addr = tuple->dst.u3; + combined.net_mix = net_hash_mix(n); + combined.dport = (__force __u16)tuple->dst.u.all; + combined.l3num = tuple->src.l3num; + combined.protonum = tuple->dst.protonum; + + hash = siphash(&combined, sizeof(combined), &nf_ct_expect_hashrnd); return reciprocal_scale(hash, nf_ct_expect_hsize); } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 5f9fc6b94855..f1e5443fe7c7 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2528,7 +2528,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nla_put_be32(skb, CTA_STATS_SEARCH_RESTART, htonl(st->search_restart)) || nla_put_be32(skb, CTA_STATS_CLASH_RESOLVE, - htonl(st->clash_resolve))) + htonl(st->clash_resolve)) || + nla_put_be32(skb, CTA_STATS_CHAIN_TOOLONG, + htonl(st->chaintoolong))) goto nla_put_failure; nlmsg_end(skb, nlh); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 7e0d956da51d..80f675d884b2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -432,7 +432,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) unsigned int nr_conntracks; if (v == SEQ_START_TOKEN) { - seq_puts(seq, "entries clashres found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); + seq_puts(seq, "entries clashres found new invalid ignore delete chainlength insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); return 0; } @@ -447,7 +447,7 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) st->invalid, 0, 0, - 0, + st->chaintoolong, st->insert, st->insert_failed, st->drop, diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 7de595ead06a..7008961f5cb0 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -13,7 +13,7 @@ #include <linux/skbuff.h> #include <linux/gfp.h> #include <net/xfrm.h> -#include <linux/jhash.h> +#include <linux/siphash.h> #include <linux/rtnetlink.h> #include <net/netfilter/nf_conntrack.h> @@ -34,7 +34,7 @@ static unsigned int nat_net_id __read_mostly; static struct hlist_head *nf_nat_bysource __read_mostly; static unsigned int nf_nat_htable_size __read_mostly; -static unsigned int nf_nat_hash_rnd __read_mostly; +static siphash_key_t nf_nat_hash_rnd __read_mostly; struct nf_nat_lookup_hook_priv { struct nf_hook_entries __rcu *entries; @@ -153,12 +153,22 @@ static unsigned int hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) { unsigned int hash; + struct { + struct nf_conntrack_man src; + u32 net_mix; + u32 protonum; + } __aligned(SIPHASH_ALIGNMENT) combined; get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); + memset(&combined, 0, sizeof(combined)); + /* Original src, to ensure we map it consistently if poss. */ - hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), - tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); + combined.src = tuple->src; + combined.net_mix = net_hash_mix(n); + combined.protonum = tuple->dst.protonum; + + hash = siphash(&combined, sizeof(combined), &nf_nat_hash_rnd); return reciprocal_scale(hash, nf_nat_htable_size); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 337e22d8b40b..99b1de14ff7e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -41,6 +41,7 @@ struct nft_ct_helper_obj { #ifdef CONFIG_NF_CONNTRACK_ZONES static DEFINE_PER_CPU(struct nf_conn *, nft_ct_pcpu_template); static unsigned int nft_ct_pcpu_template_refcnt __read_mostly; +static DEFINE_MUTEX(nft_ct_pcpu_mutex); #endif static u64 nft_ct_get_eval_counter(const struct nf_conn_counter *c, @@ -525,8 +526,10 @@ static void __nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv) #endif #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: + mutex_lock(&nft_ct_pcpu_mutex); if (--nft_ct_pcpu_template_refcnt == 0) nft_ct_tmpl_put_pcpu(); + mutex_unlock(&nft_ct_pcpu_mutex); break; #endif default: @@ -564,9 +567,13 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, #endif #ifdef CONFIG_NF_CONNTRACK_ZONES case NFT_CT_ZONE: - if (!nft_ct_tmpl_alloc_pcpu()) + mutex_lock(&nft_ct_pcpu_mutex); + if (!nft_ct_tmpl_alloc_pcpu()) { + mutex_unlock(&nft_ct_pcpu_mutex); return -ENOMEM; + } nft_ct_pcpu_template_refcnt++; + mutex_unlock(&nft_ct_pcpu_mutex); len = sizeof(u16); break; #endif diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 543365f58e97..2a2bc64f75cf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -46,6 +46,8 @@ * Copyright (C) 2011, <lokec@ccs.neu.edu> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/ethtool.h> #include <linux/types.h> #include <linux/mm.h> diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 525e3ea063b1..ec2322529727 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -493,7 +493,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) goto err; } - if (!size || size & 3 || len != size + hdrlen) + if (!size || len != ALIGN(size, 4) + hdrlen) goto err; if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA && diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index c4afdd026f51..bb0cd6d3d2c2 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -369,6 +369,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, { struct fq_codel_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_FQ_CODEL_MAX + 1]; + u32 quantum = 0; int err; if (!opt) @@ -386,6 +387,13 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->flows_cnt > 65536) return -EINVAL; } + if (tb[TCA_FQ_CODEL_QUANTUM]) { + quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum > FQ_CODEL_QUANTUM_MAX) { + NL_SET_ERR_MSG(extack, "Invalid quantum"); + return -EINVAL; + } + } sch_tree_lock(sch); if (tb[TCA_FQ_CODEL_TARGET]) { @@ -412,8 +420,8 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, if (tb[TCA_FQ_CODEL_ECN]) q->cparams.ecn = !!nla_get_u32(tb[TCA_FQ_CODEL_ECN]); - if (tb[TCA_FQ_CODEL_QUANTUM]) - q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); + if (quantum) + q->quantum = quantum; if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index e286dafd6e88..6ec1ebe878ae 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -230,7 +230,8 @@ static int smc_clc_prfx_set(struct socket *clcsock, goto out_rel; } /* get address to which the internal TCP socket is bound */ - kernel_getsockname(clcsock, (struct sockaddr *)&addrs); + if (kernel_getsockname(clcsock, (struct sockaddr *)&addrs) < 0) + goto out_rel; /* analyze IP specific data of net_device belonging to TCP socket */ addr6 = (struct sockaddr_in6 *)&addrs; rcu_read_lock(); diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index af227b65669e..8280c938be80 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1474,7 +1474,9 @@ static void smc_conn_abort_work(struct work_struct *work) abort_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); + lock_sock(&smc->sk); smc_conn_kill(conn, true); + release_sock(&smc->sk); sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ } diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c index d1c003a25b0f..61c276bddaf2 100644 --- a/net/sunrpc/auth_gss/gss_rpc_upcall.c +++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c @@ -160,7 +160,7 @@ static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn) mutex_lock(&sn->gssp_lock); clnt = sn->gssp_clnt; if (clnt) - atomic_inc(&clnt->cl_count); + refcount_inc(&clnt->cl_count); mutex_unlock(&sn->gssp_lock); return clnt; } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 3d685fe328fa..3e776e3dff91 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -194,6 +194,8 @@ static void rsi_request(struct cache_detail *cd, qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len); qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len); (*bpp)[-1] = '\n'; + WARN_ONCE(*blen < 0, + "RPCSEC/GSS credential too large - please use gssproxy\n"); } static int rsi_parse(struct cache_detail *cd, @@ -707,11 +709,11 @@ svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o) /* * Verify the checksum on the header and return SVC_OK on success. * Otherwise, return SVC_DROP (in the case of a bad sequence number) - * or return SVC_DENIED and indicate error in authp. + * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat. */ static int gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, - __be32 *rpcstart, struct rpc_gss_wire_cred *gc, __be32 *authp) + __be32 *rpcstart, struct rpc_gss_wire_cred *gc) { struct gss_ctx *ctx_id = rsci->mechctx; struct xdr_buf rpchdr; @@ -725,7 +727,7 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart; xdr_buf_from_iov(&iov, &rpchdr); - *authp = rpc_autherr_badverf; + rqstp->rq_auth_stat = rpc_autherr_badverf; if (argv->iov_len < 4) return SVC_DENIED; flavor = svc_getnl(argv); @@ -737,13 +739,13 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, if (rqstp->rq_deferred) /* skip verification of revisited request */ return SVC_OK; if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) { - *authp = rpcsec_gsserr_credproblem; + rqstp->rq_auth_stat = rpcsec_gsserr_credproblem; return SVC_DENIED; } if (gc->gc_seq > MAXSEQ) { trace_rpcgss_svc_seqno_large(rqstp, gc->gc_seq); - *authp = rpcsec_gsserr_ctxproblem; + rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem; return SVC_DENIED; } if (!gss_check_seq_num(rqstp, rsci, gc->gc_seq)) @@ -1038,6 +1040,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) struct rpc_gss_wire_cred *gc = &svcdata->clcred; int stat; + rqstp->rq_auth_stat = rpc_autherr_badcred; + /* * A gss export can be specified either by: * export *(sec=krb5,rw) @@ -1053,6 +1057,8 @@ svcauth_gss_set_client(struct svc_rqst *rqstp) stat = svcauth_unix_set_client(rqstp); if (stat == SVC_DROP || stat == SVC_CLOSE) return stat; + + rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } @@ -1142,7 +1148,7 @@ static void gss_free_in_token_pages(struct gssp_in_token *in_token) } static int gss_read_proxy_verf(struct svc_rqst *rqstp, - struct rpc_gss_wire_cred *gc, __be32 *authp, + struct rpc_gss_wire_cred *gc, struct xdr_netobj *in_handle, struct gssp_in_token *in_token) { @@ -1151,7 +1157,7 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp, int pages, i, res, pgto, pgfrom; size_t inlen, to_offs, from_offs; - res = gss_read_common_verf(gc, argv, authp, in_handle); + res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle); if (res) return res; @@ -1227,7 +1233,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit, * Otherwise, drop the request pending an answer to the upcall. */ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp, - struct rpc_gss_wire_cred *gc, __be32 *authp) + struct rpc_gss_wire_cred *gc) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -1236,7 +1242,7 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp, struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); memset(&rsikey, 0, sizeof(rsikey)); - ret = gss_read_verf(gc, argv, authp, + ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat, &rsikey.in_handle, &rsikey.in_token); if (ret) return ret; @@ -1339,7 +1345,7 @@ out: } static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, - struct rpc_gss_wire_cred *gc, __be32 *authp) + struct rpc_gss_wire_cred *gc) { struct kvec *resv = &rqstp->rq_res.head[0]; struct xdr_netobj cli_handle; @@ -1351,8 +1357,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp, struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); memset(&ud, 0, sizeof(ud)); - ret = gss_read_proxy_verf(rqstp, gc, authp, - &ud.in_handle, &ud.in_token); + ret = gss_read_proxy_verf(rqstp, gc, &ud.in_handle, &ud.in_token); if (ret) return ret; @@ -1525,7 +1530,7 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net) {} * response here and return SVC_COMPLETE. */ static int -svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) +svcauth_gss_accept(struct svc_rqst *rqstp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -1538,7 +1543,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) int ret; struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id); - *authp = rpc_autherr_badcred; + rqstp->rq_auth_stat = rpc_autherr_badcred; if (!svcdata) svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); if (!svcdata) @@ -1575,22 +1580,22 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0)) goto auth_err; - *authp = rpc_autherr_badverf; + rqstp->rq_auth_stat = rpc_autherr_badverf; switch (gc->gc_proc) { case RPC_GSS_PROC_INIT: case RPC_GSS_PROC_CONTINUE_INIT: if (use_gss_proxy(SVC_NET(rqstp))) - return svcauth_gss_proxy_init(rqstp, gc, authp); + return svcauth_gss_proxy_init(rqstp, gc); else - return svcauth_gss_legacy_init(rqstp, gc, authp); + return svcauth_gss_legacy_init(rqstp, gc); case RPC_GSS_PROC_DATA: case RPC_GSS_PROC_DESTROY: /* Look up the context, and check the verifier: */ - *authp = rpcsec_gsserr_credproblem; + rqstp->rq_auth_stat = rpcsec_gsserr_credproblem; rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx); if (!rsci) goto auth_err; - switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { + switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) { case SVC_OK: break; case SVC_DENIED: @@ -1600,7 +1605,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) } break; default: - *authp = rpc_autherr_rejectedcred; + rqstp->rq_auth_stat = rpc_autherr_rejectedcred; goto auth_err; } @@ -1616,13 +1621,13 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp) svc_putnl(resv, RPC_SUCCESS); goto complete; case RPC_GSS_PROC_DATA: - *authp = rpcsec_gsserr_ctxproblem; + rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem; svcdata->verf_start = resv->iov_base + resv->iov_len; if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) goto auth_err; rqstp->rq_cred = rsci->cred; get_group_info(rsci->cred.cr_group_info); - *authp = rpc_autherr_badcred; + rqstp->rq_auth_stat = rpc_autherr_badcred; switch (gc->gc_svc) { case RPC_GSS_SVC_NONE: break; diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c index 1a2c1c44bb00..59641803472c 100644 --- a/net/sunrpc/cache.c +++ b/net/sunrpc/cache.c @@ -803,7 +803,7 @@ static int cache_request(struct cache_detail *detail, detail->cache_request(detail, crq->item, &bp, &len); if (len < 0) - return -EAGAIN; + return -E2BIG; return PAGE_SIZE - len; } diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8b4de70e8ead..f056ff931444 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -167,7 +167,7 @@ static int rpc_clnt_skip_event(struct rpc_clnt *clnt, unsigned long event) case RPC_PIPEFS_MOUNT: if (clnt->cl_pipedir_objects.pdh_dentry != NULL) return 1; - if (atomic_read(&clnt->cl_count) == 0) + if (refcount_read(&clnt->cl_count) == 0) return 1; break; case RPC_PIPEFS_UMOUNT: @@ -419,7 +419,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, clnt->cl_rtt = &clnt->cl_rtt_default; rpc_init_rtt(&clnt->cl_rtt_default, clnt->cl_timeout->to_initval); - atomic_set(&clnt->cl_count, 1); + refcount_set(&clnt->cl_count, 1); if (nodename == NULL) nodename = utsname()->nodename; @@ -431,7 +431,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, if (err) goto out_no_path; if (parent) - atomic_inc(&parent->cl_count); + refcount_inc(&parent->cl_count); trace_rpc_clnt_new(clnt, xprt, program->name, args->servername); return clnt; @@ -918,18 +918,16 @@ rpc_free_client(struct rpc_clnt *clnt) static struct rpc_clnt * rpc_free_auth(struct rpc_clnt *clnt) { - if (clnt->cl_auth == NULL) - return rpc_free_client(clnt); - /* * Note: RPCSEC_GSS may need to send NULL RPC calls in order to * release remaining GSS contexts. This mechanism ensures * that it can do so safely. */ - atomic_inc(&clnt->cl_count); - rpcauth_release(clnt->cl_auth); - clnt->cl_auth = NULL; - if (atomic_dec_and_test(&clnt->cl_count)) + if (clnt->cl_auth != NULL) { + rpcauth_release(clnt->cl_auth); + clnt->cl_auth = NULL; + } + if (refcount_dec_and_test(&clnt->cl_count)) return rpc_free_client(clnt); return NULL; } @@ -943,7 +941,7 @@ rpc_release_client(struct rpc_clnt *clnt) do { if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - if (!atomic_dec_and_test(&clnt->cl_count)) + if (refcount_dec_not_one(&clnt->cl_count)) break; clnt = rpc_free_auth(clnt); } while (clnt != NULL); @@ -1082,7 +1080,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) if (clnt != NULL) { rpc_task_set_transport(task, clnt); task->tk_client = clnt; - atomic_inc(&clnt->cl_count); + refcount_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; if (clnt->cl_softerr) @@ -2694,17 +2692,18 @@ static const struct rpc_procinfo rpcproc_null = { .p_decode = rpcproc_decode_null, }; -static int rpc_ping(struct rpc_clnt *clnt) +static void +rpc_null_call_prepare(struct rpc_task *task, void *data) { - struct rpc_message msg = { - .rpc_proc = &rpcproc_null, - }; - int err; - err = rpc_call_sync(clnt, &msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN | - RPC_TASK_NULLCREDS); - return err; + task->tk_flags &= ~RPC_TASK_NO_RETRANS_TIMEOUT; + rpc_call_start(task); } +static const struct rpc_call_ops rpc_null_ops = { + .rpc_call_prepare = rpc_null_call_prepare, + .rpc_call_done = rpc_default_callback, +}; + static struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, struct rpc_xprt *xprt, struct rpc_cred *cred, int flags, @@ -2718,7 +2717,7 @@ struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt, .rpc_xprt = xprt, .rpc_message = &msg, .rpc_op_cred = cred, - .callback_ops = (ops != NULL) ? ops : &rpc_default_ops, + .callback_ops = ops ?: &rpc_null_ops, .callback_data = data, .flags = flags | RPC_TASK_SOFT | RPC_TASK_SOFTCONN | RPC_TASK_NULLCREDS, @@ -2733,6 +2732,19 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int } EXPORT_SYMBOL_GPL(rpc_call_null); +static int rpc_ping(struct rpc_clnt *clnt) +{ + struct rpc_task *task; + int status; + + task = rpc_call_null_helper(clnt, NULL, NULL, 0, NULL, NULL); + if (IS_ERR(task)) + return PTR_ERR(task); + status = task->tk_status; + rpc_put_task(task); + return status; +} + struct rpc_cb_add_xprt_calldata { struct rpc_xprt_switch *xps; struct rpc_xprt *xprt; @@ -2756,6 +2768,7 @@ static void rpc_cb_add_xprt_release(void *calldata) } static const struct rpc_call_ops rpc_cb_add_xprt_call_ops = { + .rpc_call_prepare = rpc_null_call_prepare, .rpc_call_done = rpc_cb_add_xprt_done, .rpc_release = rpc_cb_add_xprt_release, }; @@ -2774,6 +2787,15 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, struct rpc_cb_add_xprt_calldata *data; struct rpc_task *task; + if (xps->xps_nunique_destaddr_xprts + 1 > clnt->cl_max_connect) { + rcu_read_lock(); + pr_warn("SUNRPC: reached max allowed number (%d) did not add " + "transport to server: %s\n", clnt->cl_max_connect, + rpc_peeraddr2str(clnt, RPC_DISPLAY_ADDR)); + rcu_read_unlock(); + return -EINVAL; + } + data = kmalloc(sizeof(*data), GFP_NOFS); if (!data) return -ENOMEM; @@ -2786,7 +2808,7 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC, &rpc_cb_add_xprt_call_ops, data); - + data->xps->xps_nunique_destaddr_xprts++; rpc_put_task(task); success: return 1; diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 827bf3a28178..7dc9cc929bfd 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -90,7 +90,7 @@ static int tasks_open(struct inode *inode, struct file *filp) struct seq_file *seq = filp->private_data; struct rpc_clnt *clnt = seq->private = inode->i_private; - if (!atomic_inc_not_zero(&clnt->cl_count)) { + if (!refcount_inc_not_zero(&clnt->cl_count)) { seq_release(inode, filp); ret = -EINVAL; } diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 09c000d490a1..ee5336d73fdd 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -423,7 +423,7 @@ rpc_info_open(struct inode *inode, struct file *file) spin_lock(&file->f_path.dentry->d_lock); if (!d_unhashed(file->f_path.dentry)) clnt = RPC_I(inode)->private; - if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) { + if (clnt != NULL && refcount_inc_not_zero(&clnt->cl_count)) { spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index bfcbaf7b3822..a3bbe5ce4570 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1186,22 +1186,6 @@ void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} #endif -__be32 -svc_return_autherr(struct svc_rqst *rqstp, __be32 auth_err) -{ - set_bit(RQ_AUTHERR, &rqstp->rq_flags); - return auth_err; -} -EXPORT_SYMBOL_GPL(svc_return_autherr); - -static __be32 -svc_get_autherr(struct svc_rqst *rqstp, __be32 *statp) -{ - if (test_and_clear_bit(RQ_AUTHERR, &rqstp->rq_flags)) - return *statp; - return rpc_auth_ok; -} - static int svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) { @@ -1225,7 +1209,7 @@ svc_generic_dispatch(struct svc_rqst *rqstp, __be32 *statp) test_bit(RQ_DROPME, &rqstp->rq_flags)) return 0; - if (test_bit(RQ_AUTHERR, &rqstp->rq_flags)) + if (rqstp->rq_auth_stat != rpc_auth_ok) return 1; if (*statp != rpc_success) @@ -1306,7 +1290,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) struct svc_process_info process; __be32 *statp; u32 prog, vers; - __be32 auth_stat, rpc_stat; + __be32 rpc_stat; int auth_res; __be32 *reply_statp; @@ -1349,14 +1333,12 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) * We do this before anything else in order to get a decent * auth verifier. */ - auth_res = svc_authenticate(rqstp, &auth_stat); + auth_res = svc_authenticate(rqstp); /* Also give the program a chance to reject this call: */ - if (auth_res == SVC_OK && progp) { - auth_stat = rpc_autherr_badcred; + if (auth_res == SVC_OK && progp) auth_res = progp->pg_authenticate(rqstp); - } if (auth_res != SVC_OK) - trace_svc_authenticate(rqstp, auth_res, auth_stat); + trace_svc_authenticate(rqstp, auth_res); switch (auth_res) { case SVC_OK: break; @@ -1415,15 +1397,15 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv) goto release_dropit; if (*statp == rpc_garbage_args) goto err_garbage; - auth_stat = svc_get_autherr(rqstp, statp); - if (auth_stat != rpc_auth_ok) - goto err_release_bad_auth; } else { dprintk("svc: calling dispatcher\n"); if (!process.dispatch(rqstp, statp)) goto release_dropit; /* Release reply info */ } + if (rqstp->rq_auth_stat != rpc_auth_ok) + goto err_release_bad_auth; + /* Check RPC status result */ if (*statp != rpc_success) resv->iov_len = ((void*)statp) - resv->iov_base + 4; @@ -1473,13 +1455,14 @@ err_release_bad_auth: if (procp->pc_release) procp->pc_release(rqstp); err_bad_auth: - dprintk("svc: authentication failed (%d)\n", ntohl(auth_stat)); + dprintk("svc: authentication failed (%d)\n", + be32_to_cpu(rqstp->rq_auth_stat)); serv->sv_stats->rpcbadauth++; /* Restore write pointer to location of accept status: */ xdr_ressize_check(rqstp, reply_statp); svc_putnl(resv, 1); /* REJECT */ svc_putnl(resv, 1); /* AUTH_ERROR */ - svc_putnl(resv, ntohl(auth_stat)); /* status */ + svc_putu32(resv, rqstp->rq_auth_stat); /* status */ goto sendit; err_bad_prog: diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index e1153cba9cc6..6316bd2b8f37 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -663,7 +663,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) { struct svc_serv *serv = rqstp->rq_server; struct xdr_buf *arg = &rqstp->rq_arg; - unsigned long pages, filled; + unsigned long pages, filled, ret; pagevec_init(&rqstp->rq_pvec); @@ -675,11 +675,12 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) pages = RPCSVC_MAXPAGES; } - for (;;) { - filled = alloc_pages_bulk_array(GFP_KERNEL, pages, - rqstp->rq_pages); - if (filled == pages) - break; + for (filled = 0; filled < pages; filled = ret) { + ret = alloc_pages_bulk_array(GFP_KERNEL, pages, + rqstp->rq_pages); + if (ret > filled) + /* Made progress, don't sleep yet */ + continue; set_current_state(TASK_INTERRUPTIBLE); if (signalled() || kthread_should_stop()) { diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c index 998b196b6176..5a8b8e03fdd4 100644 --- a/net/sunrpc/svcauth.c +++ b/net/sunrpc/svcauth.c @@ -59,12 +59,12 @@ svc_put_auth_ops(struct auth_ops *aops) } int -svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) +svc_authenticate(struct svc_rqst *rqstp) { rpc_authflavor_t flavor; struct auth_ops *aops; - *authp = rpc_auth_ok; + rqstp->rq_auth_stat = rpc_auth_ok; flavor = svc_getnl(&rqstp->rq_arg.head[0]); @@ -72,7 +72,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) aops = svc_get_auth_ops(flavor); if (aops == NULL) { - *authp = rpc_autherr_badcred; + rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } @@ -80,7 +80,7 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp) init_svc_cred(&rqstp->rq_cred); rqstp->rq_authop = aops; - return aops->accept(rqstp, authp); + return aops->accept(rqstp); } EXPORT_SYMBOL_GPL(svc_authenticate); diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 35b7966ac3b3..d7ed7d49115a 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -681,8 +681,9 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) rqstp->rq_client = NULL; if (rqstp->rq_proc == 0) - return SVC_OK; + goto out; + rqstp->rq_auth_stat = rpc_autherr_badcred; ipm = ip_map_cached_get(xprt); if (ipm == NULL) ipm = __ip_map_lookup(sn->ip_map_cache, rqstp->rq_server->sv_program->pg_class, @@ -719,13 +720,16 @@ svcauth_unix_set_client(struct svc_rqst *rqstp) put_group_info(cred->cr_group_info); cred->cr_group_info = gi; } + +out: + rqstp->rq_auth_stat = rpc_auth_ok; return SVC_OK; } EXPORT_SYMBOL_GPL(svcauth_unix_set_client); static int -svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) +svcauth_null_accept(struct svc_rqst *rqstp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -736,12 +740,12 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp) if (svc_getu32(argv) != 0) { dprintk("svc: bad null cred\n"); - *authp = rpc_autherr_badcred; + rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { dprintk("svc: bad null verf\n"); - *authp = rpc_autherr_badverf; + rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } @@ -785,7 +789,7 @@ struct auth_ops svcauth_null = { static int -svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) +svcauth_unix_accept(struct svc_rqst *rqstp) { struct kvec *argv = &rqstp->rq_arg.head[0]; struct kvec *resv = &rqstp->rq_res.head[0]; @@ -827,7 +831,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) } groups_sort(cred->cr_group_info); if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) { - *authp = rpc_autherr_badverf; + rqstp->rq_auth_stat = rpc_autherr_badverf; return SVC_DENIED; } @@ -839,7 +843,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp) return SVC_OK; badcred: - *authp = rpc_autherr_badcred; + rqstp->rq_auth_stat = rpc_autherr_badcred; return SVC_DENIED; } diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c index 64da3bfd28e6..9a6f17e18f73 100644 --- a/net/sunrpc/sysfs.c +++ b/net/sunrpc/sysfs.c @@ -100,6 +100,28 @@ static ssize_t rpc_sysfs_xprt_dstaddr_show(struct kobject *kobj, return ret + 1; } +static ssize_t rpc_sysfs_xprt_srcaddr_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct rpc_xprt *xprt = rpc_sysfs_xprt_kobj_get_xprt(kobj); + struct sockaddr_storage saddr; + struct sock_xprt *sock; + ssize_t ret = -1; + + if (!xprt) + return 0; + + sock = container_of(xprt, struct sock_xprt, xprt); + if (kernel_getsockname(sock->sock, (struct sockaddr *)&saddr) < 0) + goto out; + + ret = sprintf(buf, "%pISc\n", &saddr); +out: + xprt_put(xprt); + return ret + 1; +} + static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) @@ -114,14 +136,16 @@ static ssize_t rpc_sysfs_xprt_info_show(struct kobject *kobj, "max_num_slots=%u\nmin_num_slots=%u\nnum_reqs=%u\n" "binding_q_len=%u\nsending_q_len=%u\npending_q_len=%u\n" "backlog_q_len=%u\nmain_xprt=%d\nsrc_port=%u\n" - "tasks_queuelen=%ld\n", + "tasks_queuelen=%ld\ndst_port=%s\n", xprt->last_used, xprt->cong, xprt->cwnd, xprt->max_reqs, xprt->min_reqs, xprt->num_reqs, xprt->binding.qlen, xprt->sending.qlen, xprt->pending.qlen, xprt->backlog.qlen, xprt->main, (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? get_srcport(xprt) : 0, - atomic_long_read(&xprt->queuelen)); + atomic_long_read(&xprt->queuelen), + (xprt->xprt_class->ident == XPRT_TRANSPORT_TCP) ? + xprt->address_strings[RPC_DISPLAY_PORT] : "0"); xprt_put(xprt); return ret + 1; } @@ -183,8 +207,10 @@ static ssize_t rpc_sysfs_xprt_switch_info_show(struct kobject *kobj, if (!xprt_switch) return 0; - ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\nqueue_len=%ld\n", + ret = sprintf(buf, "num_xprts=%u\nnum_active=%u\n" + "num_unique_destaddr=%u\nqueue_len=%ld\n", xprt_switch->xps_nxprts, xprt_switch->xps_nactive, + xprt_switch->xps_nunique_destaddr_xprts, atomic_long_read(&xprt_switch->xps_queuelen)); xprt_switch_put(xprt_switch); return ret + 1; @@ -376,6 +402,9 @@ static const void *rpc_sysfs_xprt_namespace(struct kobject *kobj) static struct kobj_attribute rpc_sysfs_xprt_dstaddr = __ATTR(dstaddr, 0644, rpc_sysfs_xprt_dstaddr_show, rpc_sysfs_xprt_dstaddr_store); +static struct kobj_attribute rpc_sysfs_xprt_srcaddr = __ATTR(srcaddr, + 0644, rpc_sysfs_xprt_srcaddr_show, NULL); + static struct kobj_attribute rpc_sysfs_xprt_info = __ATTR(xprt_info, 0444, rpc_sysfs_xprt_info_show, NULL); @@ -384,6 +413,7 @@ static struct kobj_attribute rpc_sysfs_xprt_change_state = __ATTR(xprt_state, static struct attribute *rpc_sysfs_xprt_attrs[] = { &rpc_sysfs_xprt_dstaddr.attr, + &rpc_sysfs_xprt_srcaddr.attr, &rpc_sysfs_xprt_info.attr, &rpc_sysfs_xprt_change_state.attr, NULL, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 05abe344a269..cfd681700d1a 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -762,6 +762,20 @@ void xprt_disconnect_done(struct rpc_xprt *xprt) EXPORT_SYMBOL_GPL(xprt_disconnect_done); /** + * xprt_schedule_autoclose_locked - Try to schedule an autoclose RPC call + * @xprt: transport to disconnect + */ +static void xprt_schedule_autoclose_locked(struct rpc_xprt *xprt) +{ + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) + queue_work(xprtiod_workqueue, &xprt->task_cleanup); + else if (xprt->snd_task && !test_bit(XPRT_SND_IS_COOKIE, &xprt->state)) + rpc_wake_up_queued_task_set_status(&xprt->pending, + xprt->snd_task, -ENOTCONN); +} + +/** * xprt_force_disconnect - force a transport to disconnect * @xprt: transport to disconnect * @@ -772,13 +786,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt) /* Don't race with the test_bit() in xprt_clear_locked() */ spin_lock(&xprt->transport_lock); - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - /* Try to schedule an autoclose RPC call */ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(xprtiod_workqueue, &xprt->task_cleanup); - else if (xprt->snd_task) - rpc_wake_up_queued_task_set_status(&xprt->pending, - xprt->snd_task, -ENOTCONN); + xprt_schedule_autoclose_locked(xprt); spin_unlock(&xprt->transport_lock); } EXPORT_SYMBOL_GPL(xprt_force_disconnect); @@ -818,11 +826,7 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie) goto out; if (test_bit(XPRT_CLOSING, &xprt->state)) goto out; - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - /* Try to schedule an autoclose RPC call */ - if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0) - queue_work(xprtiod_workqueue, &xprt->task_cleanup); - xprt_wake_pending_tasks(xprt, -EAGAIN); + xprt_schedule_autoclose_locked(xprt); out: spin_unlock(&xprt->transport_lock); } @@ -880,12 +884,14 @@ bool xprt_lock_connect(struct rpc_xprt *xprt, goto out; if (xprt->snd_task != task) goto out; + set_bit(XPRT_SND_IS_COOKIE, &xprt->state); xprt->snd_task = cookie; ret = true; out: spin_unlock(&xprt->transport_lock); return ret; } +EXPORT_SYMBOL_GPL(xprt_lock_connect); void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) { @@ -895,12 +901,14 @@ void xprt_unlock_connect(struct rpc_xprt *xprt, void *cookie) if (!test_bit(XPRT_LOCKED, &xprt->state)) goto out; xprt->snd_task =NULL; + clear_bit(XPRT_SND_IS_COOKIE, &xprt->state); xprt->ops->release_xprt(xprt, NULL); xprt_schedule_autodisconnect(xprt); out: spin_unlock(&xprt->transport_lock); wake_up_bit(&xprt->state, XPRT_LOCKED); } +EXPORT_SYMBOL_GPL(xprt_unlock_connect); /** * xprt_connect - schedule a transport connect operation diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index c60820e45082..1693f81aae37 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -139,6 +139,7 @@ struct rpc_xprt_switch *xprt_switch_alloc(struct rpc_xprt *xprt, xps->xps_iter_ops = &rpc_xprt_iter_singular; rpc_sysfs_xprt_switch_setup(xps, xprt, gfp_flags); xprt_switch_add_xprt_locked(xps, xprt); + xps->xps_nunique_destaddr_xprts = 1; rpc_sysfs_xprt_setup(xps, xprt, gfp_flags); } diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 1151efd09b27..17f174d6ea3b 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -115,7 +115,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) if (rc < 0) goto failed_marshal; - if (rpcrdma_post_sends(r_xprt, req)) + if (frwr_send(r_xprt, req)) goto drop_connection; return 0; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 229fcc9a9064..f700b34a5bfd 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -394,6 +394,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) struct rpcrdma_ep *ep = r_xprt->rx_ep; struct rpcrdma_mr *mr; unsigned int num_wrs; + int ret; num_wrs = 1; post_wr = send_wr; @@ -420,7 +421,10 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) } trace_xprtrdma_post_send(req); - return ib_post_send(ep->re_id->qp, post_wr, NULL); + ret = ib_post_send(ep->re_id->qp, post_wr, NULL); + if (ret) + trace_xprtrdma_post_send_err(r_xprt, req, ret); + return ret; } /** @@ -557,6 +561,10 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /* On error, the MRs get destroyed once the QP has drained. */ trace_xprtrdma_post_linv_err(req, rc); + + /* Force a connection loss to ensure complete recovery. + */ + rpcrdma_force_disconnect(ep); } /** @@ -653,4 +661,8 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * retransmission. */ rpcrdma_unpin_rqst(req->rl_reply); + + /* Force a connection loss to ensure complete recovery. + */ + rpcrdma_force_disconnect(ep); } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 9c2ffc67c0fd..16e5696314a4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -250,12 +250,9 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt->stat.connect_start; xprt_set_connected(xprt); rc = -EAGAIN; - } else { - /* Force a call to xprt_rdma_close to clean up */ - spin_lock(&xprt->transport_lock); - set_bit(XPRT_CLOSE_WAIT, &xprt->state); - spin_unlock(&xprt->transport_lock); - } + } else + rpcrdma_xprt_disconnect(r_xprt); + xprt_unlock_connect(xprt, r_xprt); xprt_wake_pending_tasks(xprt, rc); } @@ -489,6 +486,8 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) struct rpcrdma_ep *ep = r_xprt->rx_ep; unsigned long delay; + WARN_ON_ONCE(!xprt_lock_connect(xprt, task, r_xprt)); + delay = 0; if (ep && ep->re_connect_status != 0) { delay = xprt_reconnect_delay(xprt); @@ -661,7 +660,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst) goto drop_connection; rqst->rq_xtime = ktime_get(); - if (rpcrdma_post_sends(r_xprt, req)) + if (frwr_send(r_xprt, req)) goto drop_connection; rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 649c23518ec0..aaec3c9be8db 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -124,7 +124,7 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) * connection is closed or lost. (The important thing is it needs * to be invoked "at least" once). */ -static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) +void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) { if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) xprt_force_disconnect(ep->re_xprt); @@ -1350,21 +1350,6 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) } /** - * rpcrdma_post_sends - Post WRs to a transport's Send Queue - * @r_xprt: controlling transport instance - * @req: rpcrdma_req containing the Send WR to post - * - * Returns 0 if the post was successful, otherwise -ENOTCONN - * is returned. - */ -int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) -{ - if (frwr_send(r_xprt, req)) - return -ENOTCONN; - return 0; -} - -/** * rpcrdma_post_recvs - Refill the Receive Queue * @r_xprt: controlling transport instance * @needed: current credit grant @@ -1416,12 +1401,8 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) rc = ib_post_recv(ep->re_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); - if (atomic_dec_return(&ep->re_receiving) > 0) - complete(&ep->re_done); - -out: - trace_xprtrdma_post_recvs(r_xprt, count, rc); if (rc) { + trace_xprtrdma_post_recvs_err(r_xprt, rc); for (wr = bad_wr; wr;) { struct rpcrdma_rep *rep; @@ -1431,6 +1412,11 @@ out: --count; } } + if (atomic_dec_return(&ep->re_receiving) > 0) + complete(&ep->re_done); + +out: + trace_xprtrdma_post_recvs(r_xprt, count); ep->re_receive_count += count; return; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5d231d94e944..d91f54eae00b 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -454,11 +454,11 @@ extern unsigned int xprt_rdma_memreg_strategy; /* * Endpoint calls - xprtrdma/verbs.c */ +void rpcrdma_force_disconnect(struct rpcrdma_ep *ep); void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc); int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); -int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp); /* diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b7dbdcbdeb6c..04f1b78bcbca 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1656,7 +1656,7 @@ static int xs_get_srcport(struct sock_xprt *transport) unsigned short get_srcport(struct rpc_xprt *xprt) { struct sock_xprt *sock = container_of(xprt, struct sock_xprt, xprt); - return sock->srcport; + return xs_sock_getport(sock->sock); } EXPORT_SYMBOL(get_srcport); @@ -2099,13 +2099,20 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt) if (sock == NULL) return; + if (!xprt->reuseport) { + xs_close(xprt); + return; + } switch (skst) { - default: + case TCP_FIN_WAIT1: + case TCP_FIN_WAIT2: + break; + case TCP_ESTABLISHED: + case TCP_CLOSE_WAIT: kernel_sock_shutdown(sock, SHUT_RDWR); trace_rpc_socket_shutdown(xprt, sock); break; - case TCP_CLOSE: - case TCP_TIME_WAIT: + default: xs_reset_transport(transport); } } diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e3105ba407c7..ad570c2450be 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1426,7 +1426,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (ua) { if (!tipc_uaddr_valid(ua, m->msg_namelen)) return -EINVAL; - atype = ua->addrtype; + atype = ua->addrtype; } /* If socket belongs to a communication group follow other paths */ @@ -2423,7 +2423,7 @@ static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, u32 dport, struct sk_buff_head *xmitq) { - unsigned long time_limit = jiffies + 2; + unsigned long time_limit = jiffies + usecs_to_jiffies(20000); struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index eb47b9de2380..92345c9bb60c 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -3073,7 +3073,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock, other = unix_peer(sk); if (other && unix_peer(other) != sk && - unix_recvq_full(other) && + unix_recvq_full_lockless(other) && unix_dgram_peer_wake_me(sk, other)) writable = 0; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 3e02cc3b24f8..e2c0cfb334d2 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -2014,7 +2014,7 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, { const struct vsock_transport *transport; struct vsock_sock *vsk; - ssize_t record_len; + ssize_t msg_len; long timeout; int err = 0; DEFINE_WAIT(wait); @@ -2028,9 +2028,9 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, if (err <= 0) goto out; - record_len = transport->seqpacket_dequeue(vsk, msg, flags); + msg_len = transport->seqpacket_dequeue(vsk, msg, flags); - if (record_len < 0) { + if (msg_len < 0) { err = -ENOMEM; goto out; } @@ -2044,14 +2044,14 @@ static int __vsock_seqpacket_recvmsg(struct sock *sk, struct msghdr *msg, * packet. */ if (flags & MSG_TRUNC) - err = record_len; + err = msg_len; else err = len - msg_data_left(msg); /* Always set MSG_TRUNC if real length of packet is * bigger than user's buffer. */ - if (record_len > len) + if (msg_len > len) msg->msg_flags |= MSG_TRUNC; } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 081e7ae93cb1..59ee1be5a6dd 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -76,8 +76,12 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info, goto out; if (msg_data_left(info->msg) == 0 && - info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) - pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) { + pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM); + + if (info->msg->msg_flags & MSG_EOR) + pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR); + } } trace_virtio_transport_alloc_pkt(src_cid, src_port, @@ -457,9 +461,12 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk, dequeued_len += pkt_len; } - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) { + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) { msg_ready = true; vvs->msg_count--; + + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) + msg->msg_flags |= MSG_EOR; } virtio_transport_dec_rx_pkt(vvs, pkt); @@ -1029,7 +1036,7 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, goto out; } - if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) + if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) vvs->msg_count++; /* Try to copy small packets into the buffer of last packet queued, @@ -1044,12 +1051,12 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk, /* If there is space in the last packet queued, we copy the * new packet in its buffer. We avoid this if the last packet - * queued has VIRTIO_VSOCK_SEQ_EOR set, because this is - * delimiter of SEQPACKET record, so 'pkt' is the first packet - * of a new record. + * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is + * delimiter of SEQPACKET message, so 'pkt' is the first packet + * of a new message. */ if ((pkt->len <= last_pkt->buf_len - last_pkt->len) && - !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) { + !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) { memcpy(last_pkt->buf + last_pkt->len, pkt->buf, pkt->len); last_pkt->len += pkt->len; |