summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/client.c20
-rw-r--r--net/atm/common.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/batman-adv/fragmentation.c20
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bluetooth/af_bluetooth.c2
-rw-r--r--net/bluetooth/cmtp/capi.c2
-rw-r--r--net/bluetooth/hci_request.c2
-rw-r--r--net/bluetooth/l2cap_sock.c1
-rw-r--r--net/bluetooth/rfcomm/sock.c1
-rw-r--r--net/bluetooth/sco.c1
-rw-r--r--net/bridge/br_forward.c3
-rw-r--r--net/bridge/br_sysfs_br.c1
-rw-r--r--net/bridge/br_sysfs_if.c1
-rw-r--r--net/bridge/br_vlan.c2
-rw-r--r--net/caif/caif_socket.c2
-rw-r--r--net/ceph/cls_lock_client.c14
-rw-r--r--net/ceph/crush/crush.c5
-rw-r--r--net/ceph/crush/mapper.c227
-rw-r--r--net/ceph/crypto.c2
-rw-r--r--net/ceph/messenger.c44
-rw-r--r--net/ceph/osd_client.c130
-rw-r--r--net/ceph/osdmap.c101
-rw-r--r--net/ceph/snapshot.c2
-rw-r--r--net/core/dev.c111
-rw-r--r--net/core/ethtool.c2
-rw-r--r--net/core/net-sysfs.c1
-rw-r--r--net/core/net_namespace.c2
-rw-r--r--net/core/netclassid_cgroup.c2
-rw-r--r--net/core/netprio_cgroup.c2
-rw-r--r--net/core/scm.c1
-rw-r--r--net/core/sock.c16
-rw-r--r--net/core/stream.c1
-rw-r--r--net/dccp/input.c10
-rw-r--r--net/dccp/minisocks.c5
-rw-r--r--net/dccp/output.c1
-rw-r--r--net/decnet/af_decnet.c2
-rw-r--r--net/dns_resolver/dns_query.c6
-rw-r--r--net/ipv4/devinet.c1
-rw-r--r--net/ipv4/fib_frontend.c1
-rw-r--r--net/ipv4/netfilter.c7
-rw-r--r--net/ipv4/tcp.c15
-rw-r--r--net/ipv4/tcp_cdg.c2
-rw-r--r--net/ipv4/tcp_input.c10
-rw-r--r--net/ipv6/addrconf.c23
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c1
-rw-r--r--net/ipv6/route.c21
-rw-r--r--net/irda/af_irda.c1
-rw-r--r--net/irda/ircomm/ircomm_tty.c2
-rw-r--r--net/irda/irnet/irnet_ppp.c3
-rw-r--r--net/iucv/af_iucv.c2
-rw-r--r--net/kcm/kcmsock.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/mac80211/agg-rx.c3
-rw-r--r--net/mac80211/ieee80211_i.h2
-rw-r--r--net/mac80211/mesh_plink.c2
-rw-r--r--net/mac80211/pm.c1
-rw-r--r--net/mac80211/rx.c31
-rw-r--r--net/mac80211/sta_info.c4
-rw-r--r--net/mac80211/sta_info.h8
-rw-r--r--net/mac80211/status.c7
-rw-r--r--net/mac802154/llsec.c2
-rw-r--r--net/netfilter/nf_conntrack_sip.c2
-rw-r--r--net/netfilter/nf_tables_api.c133
-rw-r--r--net/netfilter/nft_set_rbtree.c9
-rw-r--r--net/netfilter/xt_owner.c2
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/llcp_sock.c1
-rw-r--r--net/openvswitch/actions.c3
-rw-r--r--net/openvswitch/conntrack.c1
-rw-r--r--net/packet/af_packet.c8
-rw-r--r--net/phonet/pep.c1
-rw-r--r--net/phonet/socket.c2
-rw-r--r--net/rds/ib.c10
-rw-r--r--net/rds/ib_mr.h2
-rw-r--r--net/rds/page.c29
-rw-r--r--net/rds/rds.h9
-rw-r--r--net/rds/tcp.c6
-rw-r--r--net/rds/transport.c4
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/af_rxrpc.c12
-rw-r--r--net/rxrpc/ar-internal.h1
-rw-r--r--net/rxrpc/call_accept.c48
-rw-r--r--net/rxrpc/call_object.c18
-rw-r--r--net/rxrpc/conn_client.c2
-rw-r--r--net/rxrpc/input.c1
-rw-r--r--net/rxrpc/recvmsg.c41
-rw-r--r--net/rxrpc/sendmsg.c60
-rw-r--r--net/sched/em_meta.c1
-rw-r--r--net/sctp/input.c3
-rw-r--r--net/sctp/socket.c1
-rw-r--r--net/smc/af_smc.c2
-rw-r--r--net/smc/smc_clc.c2
-rw-r--r--net/smc/smc_close.c2
-rw-r--r--net/smc/smc_rx.c2
-rw-r--r--net/smc/smc_tx.c2
-rw-r--r--net/strparser/strparser.c1
-rw-r--r--net/sunrpc/auth.c16
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c4
-rw-r--r--net/sunrpc/auth_null.c3
-rw-r--r--net/sunrpc/auth_unix.c18
-rw-r--r--net/sunrpc/cache.c121
-rw-r--r--net/sunrpc/clnt.c51
-rw-r--r--net/sunrpc/debugfs.c35
-rw-r--r--net/sunrpc/svc.c26
-rw-r--r--net/sunrpc/svcauth_unix.c4
-rw-r--r--net/sunrpc/svcsock.c1
-rw-r--r--net/sunrpc/xdr.c34
-rw-r--r--net/sunrpc/xprt.c2
-rw-r--r--net/sunrpc/xprtrdma/fmr_ops.c5
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c11
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c82
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c17
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_marshal.c299
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c20
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c22
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c69
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/verbs.c96
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h30
-rw-r--r--net/sunrpc/xprtsock.c94
-rw-r--r--net/tipc/socket.c2
-rw-r--r--net/unix/af_unix.c2
-rw-r--r--net/vmw_vsock/af_vsock.c1
-rw-r--r--net/vmw_vsock/virtio_transport.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c1
-rw-r--r--net/x25/af_x25.c2
128 files changed, 1390 insertions, 1018 deletions
diff --git a/net/9p/client.c b/net/9p/client.c
index 3fc94a49ccd5..3ce672af1596 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -32,7 +32,7 @@
#include <linux/idr.h>
#include <linux/mutex.h>
#include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
#include <net/9p/9p.h>
@@ -1101,7 +1101,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
EXPORT_SYMBOL(p9_client_begin_disconnect);
struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
- char *uname, kuid_t n_uname, char *aname)
+ const char *uname, kuid_t n_uname, const char *aname)
{
int err = 0;
struct p9_req_t *req;
@@ -1149,7 +1149,7 @@ error:
EXPORT_SYMBOL(p9_client_attach);
struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
- char **wnames, int clone)
+ const unsigned char * const *wnames, int clone)
{
int err;
struct p9_client *clnt;
@@ -1271,7 +1271,7 @@ error:
}
EXPORT_SYMBOL(p9_client_open);
-int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
+int p9_client_create_dotl(struct p9_fid *ofid, const char *name, u32 flags, u32 mode,
kgid_t gid, struct p9_qid *qid)
{
int err = 0;
@@ -1316,7 +1316,7 @@ error:
}
EXPORT_SYMBOL(p9_client_create_dotl);
-int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
+int p9_client_fcreate(struct p9_fid *fid, const char *name, u32 perm, int mode,
char *extension)
{
int err;
@@ -1361,8 +1361,8 @@ error:
}
EXPORT_SYMBOL(p9_client_fcreate);
-int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
- struct p9_qid *qid)
+int p9_client_symlink(struct p9_fid *dfid, const char *name,
+ const char *symtgt, kgid_t gid, struct p9_qid *qid)
{
int err = 0;
struct p9_client *clnt;
@@ -1395,7 +1395,7 @@ error:
}
EXPORT_SYMBOL(p9_client_symlink);
-int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)
+int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, const char *newname)
{
struct p9_client *clnt;
struct p9_req_t *req;
@@ -2117,7 +2117,7 @@ error:
}
EXPORT_SYMBOL(p9_client_readdir);
-int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mknod_dotl(struct p9_fid *fid, const char *name, int mode,
dev_t rdev, kgid_t gid, struct p9_qid *qid)
{
int err;
@@ -2148,7 +2148,7 @@ error:
}
EXPORT_SYMBOL(p9_client_mknod_dotl);
-int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
+int p9_client_mkdir_dotl(struct p9_fid *fid, const char *name, int mode,
kgid_t gid, struct p9_qid *qid)
{
int err;
diff --git a/net/atm/common.c b/net/atm/common.c
index a3ca922d307b..9613381f5db0 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -13,7 +13,7 @@
#include <linux/errno.h> /* error codes */
#include <linux/capability.h>
#include <linux/mm.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/time.h> /* struct timeval */
#include <linux/skbuff.h>
#include <linux/bitops.h>
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 878563a8354d..db9794ec61d8 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -10,7 +10,7 @@
#include <linux/kernel.h> /* printk */
#include <linux/skbuff.h>
#include <linux/wait.h>
-#include <linux/sched.h> /* jiffies and HZ */
+#include <linux/sched/signal.h>
#include <linux/fcntl.h> /* O_NONBLOCK */
#include <linux/init.h>
#include <linux/atm.h> /* ATM stuff */
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 90fcf5fc2e0a..a8e42cedf1db 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -20,7 +20,7 @@
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
diff --git a/net/batman-adv/fragmentation.c b/net/batman-adv/fragmentation.c
index ead18ca836de..11a23fd6e1a0 100644
--- a/net/batman-adv/fragmentation.c
+++ b/net/batman-adv/fragmentation.c
@@ -239,8 +239,10 @@ err_unlock:
spin_unlock_bh(&chain->lock);
err:
- if (!ret)
+ if (!ret) {
kfree(frag_entry_new);
+ kfree_skb(skb);
+ }
return ret;
}
@@ -313,7 +315,7 @@ free:
*
* There are three possible outcomes: 1) Packet is merged: Return true and
* set *skb to merged packet; 2) Packet is buffered: Return true and set *skb
- * to NULL; 3) Error: Return false and leave skb as is.
+ * to NULL; 3) Error: Return false and free skb.
*
* Return: true when packet is merged or buffered, false when skb is not not
* used.
@@ -338,9 +340,9 @@ bool batadv_frag_skb_buffer(struct sk_buff **skb,
goto out_err;
out:
- *skb = skb_out;
ret = true;
out_err:
+ *skb = skb_out;
return ret;
}
@@ -499,6 +501,12 @@ int batadv_frag_send_packet(struct sk_buff *skb,
/* Eat and send fragments from the tail of skb */
while (skb->len > max_fragment_size) {
+ /* The initial check in this function should cover this case */
+ if (unlikely(frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1)) {
+ ret = -EINVAL;
+ goto put_primary_if;
+ }
+
skb_fragment = batadv_frag_create(skb, &frag_header, mtu);
if (!skb_fragment) {
ret = -ENOMEM;
@@ -515,12 +523,6 @@ int batadv_frag_send_packet(struct sk_buff *skb,
}
frag_header.no++;
-
- /* The initial check in this function should cover this case */
- if (frag_header.no == BATADV_FRAG_MAX_FRAGMENTS - 1) {
- ret = -EINVAL;
- goto put_primary_if;
- }
}
/* Make room for the fragment header. */
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 8f64a5c01345..66b25e410a41 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -402,7 +402,7 @@ struct batadv_gw_node {
struct rcu_head rcu;
};
-DECLARE_EWMA(throughput, 1024, 8)
+DECLARE_EWMA(throughput, 10, 8)
/**
* struct batadv_hardif_neigh_node_bat_v - B.A.T.M.A.N. V private neighbor
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index cfb2faba46de..69e1f7d362a8 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -27,6 +27,8 @@
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/stringify.h>
+#include <linux/sched/signal.h>
+
#include <asm/ioctls.h>
#include <net/bluetooth/bluetooth.h>
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 46ac686c8911..bb308224099c 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -26,7 +26,7 @@
#include <linux/types.h>
#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/poll.h>
#include <linux/fcntl.h>
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 1015d9c8d97d..b5faff458d8b 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -21,6 +21,8 @@
SOFTWARE IS DISCLAIMED.
*/
+#include <linux/sched/signal.h>
+
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
#include <net/bluetooth/mgmt.h>
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index a8ba752732c9..f307b145ea54 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -29,6 +29,7 @@
#include <linux/module.h>
#include <linux/export.h>
+#include <linux/sched/signal.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 7511df72347f..aa1a814ceddc 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -27,6 +27,7 @@
#include <linux/export.h>
#include <linux/debugfs.h>
+#include <linux/sched/signal.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 3125ce670c2f..e4e9a2da1e7e 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -27,6 +27,7 @@
#include <linux/module.h>
#include <linux/debugfs.h>
#include <linux/seq_file.h>
+#include <linux/sched/signal.h>
#include <net/bluetooth/bluetooth.h>
#include <net/bluetooth/hci_core.h>
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 6bfac29318f2..902af6ba481c 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -186,8 +186,9 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb,
/* Do not flood unicast traffic to ports that turn it off */
if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD))
continue;
+ /* Do not flood if mc off, except for traffic we originate */
if (pkt_type == BR_PKT_MULTICAST &&
- !(p->flags & BR_MCAST_FLOOD))
+ !(p->flags & BR_MCAST_FLOOD) && skb->dev != br->dev)
continue;
/* Do not flood to ports that enable proxy ARP */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 0f4034934d56..0b5dd607444c 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -19,6 +19,7 @@
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>
#include <linux/times.h>
+#include <linux/sched/signal.h>
#include "br_private.h"
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 05e8946ccc03..79aee759aba5 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -17,6 +17,7 @@
#include <linux/if_bridge.h>
#include <linux/rtnetlink.h>
#include <linux/spinlock.h>
+#include <linux/sched/signal.h>
#include "br_private.h"
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 62e68c0dc687..b838213c408e 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -997,10 +997,10 @@ err_vlan_add:
RCU_INIT_POINTER(p->vlgrp, NULL);
synchronize_rcu();
vlan_tunnel_deinit(vg);
-err_vlan_enabled:
err_tunnel_init:
rhashtable_destroy(&vg->vlan_hash);
err_rhtbl:
+err_vlan_enabled:
kfree(vg);
goto out;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 92cbbd2afddb..adcad344c843 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -9,7 +9,7 @@
#include <linux/fs.h>
#include <linux/init.h>
#include <linux/module.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/spinlock.h>
#include <linux/mutex.h>
#include <linux/list.h>
diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c
index 50f040fdb2a9..b9233b990399 100644
--- a/net/ceph/cls_lock_client.c
+++ b/net/ceph/cls_lock_client.c
@@ -69,8 +69,8 @@ int ceph_cls_lock(struct ceph_osd_client *osdc,
dout("%s lock_name %s type %d cookie %s tag %s desc %s flags 0x%x\n",
__func__, lock_name, type, cookie, tag, desc, flags);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "lock",
- CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
- lock_op_page, lock_op_buf_size, NULL, NULL);
+ CEPH_OSD_FLAG_WRITE, lock_op_page,
+ lock_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(lock_op_page);
@@ -117,8 +117,8 @@ int ceph_cls_unlock(struct ceph_osd_client *osdc,
dout("%s lock_name %s cookie %s\n", __func__, lock_name, cookie);
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "unlock",
- CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
- unlock_op_page, unlock_op_buf_size, NULL, NULL);
+ CEPH_OSD_FLAG_WRITE, unlock_op_page,
+ unlock_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(unlock_op_page);
@@ -170,8 +170,8 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc,
dout("%s lock_name %s cookie %s locker %s%llu\n", __func__, lock_name,
cookie, ENTITY_NAME(*locker));
ret = ceph_osdc_call(osdc, oid, oloc, "lock", "break_lock",
- CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK,
- break_op_page, break_op_buf_size, NULL, NULL);
+ CEPH_OSD_FLAG_WRITE, break_op_page,
+ break_op_buf_size, NULL, NULL);
dout("%s: status %d\n", __func__, ret);
__free_page(break_op_page);
@@ -278,7 +278,7 @@ int ceph_cls_lock_info(struct ceph_osd_client *osdc,
int get_info_op_buf_size;
int name_len = strlen(lock_name);
struct page *get_info_op_page, *reply_page;
- size_t reply_len;
+ size_t reply_len = PAGE_SIZE;
void *p, *end;
int ret;
diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c
index 80d7c3a97cb8..5bf94c04f645 100644
--- a/net/ceph/crush/crush.c
+++ b/net/ceph/crush/crush.c
@@ -45,7 +45,6 @@ int crush_get_bucket_item_weight(const struct crush_bucket *b, int p)
void crush_destroy_bucket_uniform(struct crush_bucket_uniform *b)
{
- kfree(b->h.perm);
kfree(b->h.items);
kfree(b);
}
@@ -54,14 +53,12 @@ void crush_destroy_bucket_list(struct crush_bucket_list *b)
{
kfree(b->item_weights);
kfree(b->sum_weights);
- kfree(b->h.perm);
kfree(b->h.items);
kfree(b);
}
void crush_destroy_bucket_tree(struct crush_bucket_tree *b)
{
- kfree(b->h.perm);
kfree(b->h.items);
kfree(b->node_weights);
kfree(b);
@@ -71,7 +68,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
{
kfree(b->straws);
kfree(b->item_weights);
- kfree(b->h.perm);
kfree(b->h.items);
kfree(b);
}
@@ -79,7 +75,6 @@ void crush_destroy_bucket_straw(struct crush_bucket_straw *b)
void crush_destroy_bucket_straw2(struct crush_bucket_straw2 *b)
{
kfree(b->item_weights);
- kfree(b->h.perm);
kfree(b->h.items);
kfree(b);
}
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 130ab407c5ec..b5cd8c21bfdf 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -54,7 +54,6 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
return -1;
}
-
/*
* bucket choose methods
*
@@ -72,59 +71,60 @@ int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size
* Since this is expensive, we optimize for the r=0 case, which
* captures the vast majority of calls.
*/
-static int bucket_perm_choose(struct crush_bucket *bucket,
+static int bucket_perm_choose(const struct crush_bucket *bucket,
+ struct crush_work_bucket *work,
int x, int r)
{
unsigned int pr = r % bucket->size;
unsigned int i, s;
/* start a new permutation if @x has changed */
- if (bucket->perm_x != (__u32)x || bucket->perm_n == 0) {
+ if (work->perm_x != (__u32)x || work->perm_n == 0) {
dprintk("bucket %d new x=%d\n", bucket->id, x);
- bucket->perm_x = x;
+ work->perm_x = x;
/* optimize common r=0 case */
if (pr == 0) {
s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
bucket->size;
- bucket->perm[0] = s;
- bucket->perm_n = 0xffff; /* magic value, see below */
+ work->perm[0] = s;
+ work->perm_n = 0xffff; /* magic value, see below */
goto out;
}
for (i = 0; i < bucket->size; i++)
- bucket->perm[i] = i;
- bucket->perm_n = 0;
- } else if (bucket->perm_n == 0xffff) {
+ work->perm[i] = i;
+ work->perm_n = 0;
+ } else if (work->perm_n == 0xffff) {
/* clean up after the r=0 case above */
for (i = 1; i < bucket->size; i++)
- bucket->perm[i] = i;
- bucket->perm[bucket->perm[0]] = 0;
- bucket->perm_n = 1;
+ work->perm[i] = i;
+ work->perm[work->perm[0]] = 0;
+ work->perm_n = 1;
}
/* calculate permutation up to pr */
- for (i = 0; i < bucket->perm_n; i++)
- dprintk(" perm_choose have %d: %d\n", i, bucket->perm[i]);
- while (bucket->perm_n <= pr) {
- unsigned int p = bucket->perm_n;
+ for (i = 0; i < work->perm_n; i++)
+ dprintk(" perm_choose have %d: %d\n", i, work->perm[i]);
+ while (work->perm_n <= pr) {
+ unsigned int p = work->perm_n;
/* no point in swapping the final entry */
if (p < bucket->size - 1) {
i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
(bucket->size - p);
if (i) {
- unsigned int t = bucket->perm[p + i];
- bucket->perm[p + i] = bucket->perm[p];
- bucket->perm[p] = t;
+ unsigned int t = work->perm[p + i];
+ work->perm[p + i] = work->perm[p];
+ work->perm[p] = t;
}
dprintk(" perm_choose swap %d with %d\n", p, p+i);
}
- bucket->perm_n++;
+ work->perm_n++;
}
for (i = 0; i < bucket->size; i++)
- dprintk(" perm_choose %d: %d\n", i, bucket->perm[i]);
+ dprintk(" perm_choose %d: %d\n", i, work->perm[i]);
- s = bucket->perm[pr];
+ s = work->perm[pr];
out:
dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
bucket->size, x, r, pr, s);
@@ -132,14 +132,14 @@ out:
}
/* uniform */
-static int bucket_uniform_choose(struct crush_bucket_uniform *bucket,
- int x, int r)
+static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket,
+ struct crush_work_bucket *work, int x, int r)
{
- return bucket_perm_choose(&bucket->h, x, r);
+ return bucket_perm_choose(&bucket->h, work, x, r);
}
/* list */
-static int bucket_list_choose(struct crush_bucket_list *bucket,
+static int bucket_list_choose(const struct crush_bucket_list *bucket,
int x, int r)
{
int i;
@@ -155,8 +155,9 @@ static int bucket_list_choose(struct crush_bucket_list *bucket,
w *= bucket->sum_weights[i];
w = w >> 16;
/*dprintk(" scaled %llx\n", w);*/
- if (w < bucket->item_weights[i])
+ if (w < bucket->item_weights[i]) {
return bucket->h.items[i];
+ }
}
dprintk("bad list sums for bucket %d\n", bucket->h.id);
@@ -192,7 +193,7 @@ static int terminal(int x)
return x & 1;
}
-static int bucket_tree_choose(struct crush_bucket_tree *bucket,
+static int bucket_tree_choose(const struct crush_bucket_tree *bucket,
int x, int r)
{
int n;
@@ -224,7 +225,7 @@ static int bucket_tree_choose(struct crush_bucket_tree *bucket,
/* straw */
-static int bucket_straw_choose(struct crush_bucket_straw *bucket,
+static int bucket_straw_choose(const struct crush_bucket_straw *bucket,
int x, int r)
{
__u32 i;
@@ -301,7 +302,7 @@ static __u64 crush_ln(unsigned int xin)
*
*/
-static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
+static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
int x, int r)
{
unsigned int i, high = 0;
@@ -344,37 +345,42 @@ static int bucket_straw2_choose(struct crush_bucket_straw2 *bucket,
high_draw = draw;
}
}
+
return bucket->h.items[high];
}
-static int crush_bucket_choose(struct crush_bucket *in, int x, int r)
+static int crush_bucket_choose(const struct crush_bucket *in,
+ struct crush_work_bucket *work,
+ int x, int r)
{
dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
BUG_ON(in->size == 0);
switch (in->alg) {
case CRUSH_BUCKET_UNIFORM:
- return bucket_uniform_choose((struct crush_bucket_uniform *)in,
- x, r);
+ return bucket_uniform_choose(
+ (const struct crush_bucket_uniform *)in,
+ work, x, r);
case CRUSH_BUCKET_LIST:
- return bucket_list_choose((struct crush_bucket_list *)in,
+ return bucket_list_choose((const struct crush_bucket_list *)in,
x, r);
case CRUSH_BUCKET_TREE:
- return bucket_tree_choose((struct crush_bucket_tree *)in,
+ return bucket_tree_choose((const struct crush_bucket_tree *)in,
x, r);
case CRUSH_BUCKET_STRAW:
- return bucket_straw_choose((struct crush_bucket_straw *)in,
- x, r);
+ return bucket_straw_choose(
+ (const struct crush_bucket_straw *)in,
+ x, r);
case CRUSH_BUCKET_STRAW2:
- return bucket_straw2_choose((struct crush_bucket_straw2 *)in,
- x, r);
+ return bucket_straw2_choose(
+ (const struct crush_bucket_straw2 *)in,
+ x, r);
default:
dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
return in->items[0];
}
}
-
/*
* true if device is marked "out" (failed, fully offloaded)
* of the cluster
@@ -416,7 +422,8 @@ static int is_out(const struct crush_map *map,
* @parent_r: r value passed from the parent
*/
static int crush_choose_firstn(const struct crush_map *map,
- struct crush_bucket *bucket,
+ struct crush_work *work,
+ const struct crush_bucket *bucket,
const __u32 *weight, int weight_max,
int x, int numrep, int type,
int *out, int outpos,
@@ -434,7 +441,7 @@ static int crush_choose_firstn(const struct crush_map *map,
int rep;
unsigned int ftotal, flocal;
int retry_descent, retry_bucket, skip_rep;
- struct crush_bucket *in = bucket;
+ const struct crush_bucket *in = bucket;
int r;
int i;
int item = 0;
@@ -473,9 +480,13 @@ static int crush_choose_firstn(const struct crush_map *map,
if (local_fallback_retries > 0 &&
flocal >= (in->size>>1) &&
flocal > local_fallback_retries)
- item = bucket_perm_choose(in, x, r);
+ item = bucket_perm_choose(
+ in, work->work[-1-in->id],
+ x, r);
else
- item = crush_bucket_choose(in, x, r);
+ item = crush_bucket_choose(
+ in, work->work[-1-in->id],
+ x, r);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
skip_rep = 1;
@@ -518,19 +529,21 @@ static int crush_choose_firstn(const struct crush_map *map,
sub_r = r >> (vary_r-1);
else
sub_r = 0;
- if (crush_choose_firstn(map,
- map->buckets[-1-item],
- weight, weight_max,
- x, stable ? 1 : outpos+1, 0,
- out2, outpos, count,
- recurse_tries, 0,
- local_retries,
- local_fallback_retries,
- 0,
- vary_r,
- stable,
- NULL,
- sub_r) <= outpos)
+ if (crush_choose_firstn(
+ map,
+ work,
+ map->buckets[-1-item],
+ weight, weight_max,
+ x, stable ? 1 : outpos+1, 0,
+ out2, outpos, count,
+ recurse_tries, 0,
+ local_retries,
+ local_fallback_retries,
+ 0,
+ vary_r,
+ stable,
+ NULL,
+ sub_r) <= outpos)
/* didn't get leaf */
reject = 1;
} else {
@@ -539,14 +552,12 @@ static int crush_choose_firstn(const struct crush_map *map,
}
}
- if (!reject) {
+ if (!reject && !collide) {
/* out? */
if (itemtype == 0)
reject = is_out(map, weight,
weight_max,
item, x);
- else
- reject = 0;
}
reject:
@@ -600,7 +611,8 @@ reject:
*
*/
static void crush_choose_indep(const struct crush_map *map,
- struct crush_bucket *bucket,
+ struct crush_work *work,
+ const struct crush_bucket *bucket,
const __u32 *weight, int weight_max,
int x, int left, int numrep, int type,
int *out, int outpos,
@@ -610,7 +622,7 @@ static void crush_choose_indep(const struct crush_map *map,
int *out2,
int parent_r)
{
- struct crush_bucket *in = bucket;
+ const struct crush_bucket *in = bucket;
int endpos = outpos + left;
int rep;
unsigned int ftotal;
@@ -678,7 +690,9 @@ static void crush_choose_indep(const struct crush_map *map,
break;
}
- item = crush_bucket_choose(in, x, r);
+ item = crush_bucket_choose(
+ in, work->work[-1-in->id],
+ x, r);
if (item >= map->max_devices) {
dprintk(" bad item %d\n", item);
out[rep] = CRUSH_ITEM_NONE;
@@ -724,13 +738,15 @@ static void crush_choose_indep(const struct crush_map *map,
if (recurse_to_leaf) {
if (item < 0) {
- crush_choose_indep(map,
- map->buckets[-1-item],
- weight, weight_max,
- x, 1, numrep, 0,
- out2, rep,
- recurse_tries, 0,
- 0, NULL, r);
+ crush_choose_indep(
+ map,
+ work,
+ map->buckets[-1-item],
+ weight, weight_max,
+ x, 1, numrep, 0,
+ out2, rep,
+ recurse_tries, 0,
+ 0, NULL, r);
if (out2[rep] == CRUSH_ITEM_NONE) {
/* placed nothing; no leaf */
break;
@@ -781,6 +797,53 @@ static void crush_choose_indep(const struct crush_map *map,
#endif
}
+
+/*
+ * This takes a chunk of memory and sets it up to be a shiny new
+ * working area for a CRUSH placement computation. It must be called
+ * on any newly allocated memory before passing it in to
+ * crush_do_rule. It may be used repeatedly after that, so long as the
+ * map has not changed. If the map /has/ changed, you must make sure
+ * the working size is no smaller than what was allocated and re-run
+ * crush_init_workspace.
+ *
+ * If you do retain the working space between calls to crush, make it
+ * thread-local.
+ */
+void crush_init_workspace(const struct crush_map *map, void *v)
+{
+ struct crush_work *w = v;
+ __s32 b;
+
+ /*
+ * We work by moving through the available space and setting
+ * values and pointers as we go.
+ *
+ * It's a bit like Forth's use of the 'allot' word since we
+ * set the pointer first and then reserve the space for it to
+ * point to by incrementing the point.
+ */
+ v += sizeof(struct crush_work *);
+ w->work = v;
+ v += map->max_buckets * sizeof(struct crush_work_bucket *);
+ for (b = 0; b < map->max_buckets; ++b) {
+ if (!map->buckets[b])
+ continue;
+
+ w->work[b] = v;
+ switch (map->buckets[b]->alg) {
+ default:
+ v += sizeof(struct crush_work_bucket);
+ break;
+ }
+ w->work[b]->perm_x = 0;
+ w->work[b]->perm_n = 0;
+ w->work[b]->perm = v;
+ v += map->buckets[b]->size * sizeof(__u32);
+ }
+ BUG_ON(v - (void *)w != map->working_size);
+}
+
/**
* crush_do_rule - calculate a mapping with the given input and rule
* @map: the crush_map
@@ -790,24 +853,25 @@ static void crush_choose_indep(const struct crush_map *map,
* @result_max: maximum result size
* @weight: weight vector (for map leaves)
* @weight_max: size of weight vector
- * @scratch: scratch vector for private use; must be >= 3 * result_max
+ * @cwin: pointer to at least crush_work_size() bytes of memory
*/
int crush_do_rule(const struct crush_map *map,
int ruleno, int x, int *result, int result_max,
const __u32 *weight, int weight_max,
- int *scratch)
+ void *cwin)
{
int result_len;
- int *a = scratch;
- int *b = scratch + result_max;
- int *c = scratch + result_max*2;
+ struct crush_work *cw = cwin;
+ int *a = cwin + map->working_size;
+ int *b = a + result_max;
+ int *c = b + result_max;
+ int *w = a;
+ int *o = b;
int recurse_to_leaf;
- int *w;
int wsize = 0;
- int *o;
int osize;
int *tmp;
- struct crush_rule *rule;
+ const struct crush_rule *rule;
__u32 step;
int i, j;
int numrep;
@@ -835,12 +899,10 @@ int crush_do_rule(const struct crush_map *map,
rule = map->rules[ruleno];
result_len = 0;
- w = a;
- o = b;
for (step = 0; step < rule->len; step++) {
int firstn = 0;
- struct crush_rule_step *curstep = &rule->steps[step];
+ const struct crush_rule_step *curstep = &rule->steps[step];
switch (curstep->op) {
case CRUSH_RULE_TAKE:
@@ -936,6 +998,7 @@ int crush_do_rule(const struct crush_map *map,
recurse_tries = choose_tries;
osize += crush_choose_firstn(
map,
+ cw,
map->buckets[bno],
weight, weight_max,
x, numrep,
@@ -956,6 +1019,7 @@ int crush_do_rule(const struct crush_map *map,
numrep : (result_max-osize));
crush_choose_indep(
map,
+ cw,
map->buckets[bno],
weight, weight_max,
x, out_size, numrep,
@@ -997,5 +1061,6 @@ int crush_do_rule(const struct crush_map *map,
break;
}
}
+
return result_len;
}
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index 292e33bd916e..46008d5ac504 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -3,10 +3,12 @@
#include <linux/err.h>
#include <linux/scatterlist.h>
+#include <linux/sched.h>
#include <linux/slab.h>
#include <crypto/aes.h>
#include <crypto/skcipher.h>
#include <linux/key-type.h>
+#include <linux/sched/mm.h>
#include <keys/ceph-type.h>
#include <keys/user-type.h>
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index bad3d4ae43f6..38dcf1eb427d 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -520,7 +520,8 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
int r;
- r = kernel_recvmsg(sock, &msg, &iov, 1, len, msg.msg_flags);
+ iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC, &iov, 1, len);
+ r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
r = 0;
return r;
@@ -529,17 +530,20 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
int page_offset, size_t length)
{
- void *kaddr;
- int ret;
+ struct bio_vec bvec = {
+ .bv_page = page,
+ .bv_offset = page_offset,
+ .bv_len = length
+ };
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ int r;
BUG_ON(page_offset + length > PAGE_SIZE);
-
- kaddr = kmap(page);
- BUG_ON(!kaddr);
- ret = ceph_tcp_recvmsg(sock, kaddr + page_offset, length);
- kunmap(page);
-
- return ret;
+ iov_iter_bvec(&msg.msg_iter, READ | ITER_BVEC, &bvec, 1, length);
+ r = sock_recvmsg(sock, &msg, msg.msg_flags);
+ if (r == -EAGAIN)
+ r = 0;
+ return r;
}
/*
@@ -579,18 +583,28 @@ static int __ceph_tcp_sendpage(struct socket *sock, struct page *page,
static int ceph_tcp_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, bool more)
{
+ struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
+ struct bio_vec bvec;
int ret;
- struct kvec iov;
/* sendpage cannot properly handle pages with page_count == 0,
* we need to fallback to sendmsg if that's the case */
if (page_count(page) >= 1)
return __ceph_tcp_sendpage(sock, page, offset, size, more);
- iov.iov_base = kmap(page) + offset;
- iov.iov_len = size;
- ret = ceph_tcp_sendmsg(sock, &iov, 1, size, more);
- kunmap(page);
+ bvec.bv_page = page;
+ bvec.bv_offset = offset;
+ bvec.bv_len = size;
+
+ if (more)
+ msg.msg_flags |= MSG_MORE;
+ else
+ msg.msg_flags |= MSG_EOR; /* superfluous, but what the hell */
+
+ iov_iter_bvec(&msg.msg_iter, WRITE | ITER_BVEC, &bvec, 1, size);
+ ret = sock_sendmsg(sock, &msg);
+ if (ret == -EAGAIN)
+ ret = 0;
return ret;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index f3378ba1a828..b65bbf9f45eb 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -460,7 +460,6 @@ static void request_init(struct ceph_osd_request *req)
kref_init(&req->r_kref);
init_completion(&req->r_completion);
- init_completion(&req->r_done_completion);
RB_CLEAR_NODE(&req->r_node);
RB_CLEAR_NODE(&req->r_mc_node);
INIT_LIST_HEAD(&req->r_unsafe_item);
@@ -672,7 +671,8 @@ void osd_req_op_extent_update(struct ceph_osd_request *osd_req,
BUG_ON(length > previous);
op->extent.length = length;
- op->indata_len -= previous - length;
+ if (op->op == CEPH_OSD_OP_WRITE || op->op == CEPH_OSD_OP_WRITEFULL)
+ op->indata_len -= previous - length;
}
EXPORT_SYMBOL(osd_req_op_extent_update);
@@ -1636,7 +1636,7 @@ static void __submit_request(struct ceph_osd_request *req, bool wrlocked)
bool need_send = false;
bool promoted = false;
- WARN_ON(req->r_tid || req->r_got_reply);
+ WARN_ON(req->r_tid);
dout("%s req %p wrlocked %d\n", __func__, req, wrlocked);
again:
@@ -1704,17 +1704,10 @@ promote:
static void account_request(struct ceph_osd_request *req)
{
- unsigned int mask = CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK;
+ WARN_ON(req->r_flags & (CEPH_OSD_FLAG_ACK | CEPH_OSD_FLAG_ONDISK));
+ WARN_ON(!(req->r_flags & (CEPH_OSD_FLAG_READ | CEPH_OSD_FLAG_WRITE)));
- if (req->r_flags & CEPH_OSD_FLAG_READ) {
- WARN_ON(req->r_flags & mask);
- req->r_flags |= CEPH_OSD_FLAG_ACK;
- } else if (req->r_flags & CEPH_OSD_FLAG_WRITE)
- WARN_ON(!(req->r_flags & mask));
- else
- WARN_ON(1);
-
- WARN_ON(req->r_unsafe_callback && (req->r_flags & mask) != mask);
+ req->r_flags |= CEPH_OSD_FLAG_ONDISK;
atomic_inc(&req->r_osdc->num_requests);
}
@@ -1749,15 +1742,15 @@ static void finish_request(struct ceph_osd_request *req)
static void __complete_request(struct ceph_osd_request *req)
{
- if (req->r_callback)
+ if (req->r_callback) {
+ dout("%s req %p tid %llu cb %pf result %d\n", __func__, req,
+ req->r_tid, req->r_callback, req->r_result);
req->r_callback(req);
- else
- complete_all(&req->r_completion);
+ }
}
/*
- * Note that this is open-coded in handle_reply(), which has to deal
- * with ack vs commit, dup acks, etc.
+ * This is open-coded in handle_reply().
*/
static void complete_request(struct ceph_osd_request *req, int err)
{
@@ -1766,7 +1759,7 @@ static void complete_request(struct ceph_osd_request *req, int err)
req->r_result = err;
finish_request(req);
__complete_request(req);
- complete_all(&req->r_done_completion);
+ complete_all(&req->r_completion);
ceph_osdc_put_request(req);
}
@@ -1792,7 +1785,7 @@ static void cancel_request(struct ceph_osd_request *req)
cancel_map_check(req);
finish_request(req);
- complete_all(&req->r_done_completion);
+ complete_all(&req->r_completion);
ceph_osdc_put_request(req);
}
@@ -2169,7 +2162,6 @@ static void linger_commit_cb(struct ceph_osd_request *req)
mutex_lock(&lreq->lock);
dout("%s lreq %p linger_id %llu result %d\n", __func__, lreq,
lreq->linger_id, req->r_result);
- WARN_ON(!__linger_registered(lreq));
linger_reg_commit_complete(lreq, req->r_result);
lreq->committed = true;
@@ -2785,31 +2777,8 @@ e_inval:
}
/*
- * We are done with @req if
- * - @m is a safe reply, or
- * - @m is an unsafe reply and we didn't want a safe one
- */
-static bool done_request(const struct ceph_osd_request *req,
- const struct MOSDOpReply *m)
-{
- return (m->result < 0 ||
- (m->flags & CEPH_OSD_FLAG_ONDISK) ||
- !(req->r_flags & CEPH_OSD_FLAG_ONDISK));
-}
-
-/*
- * handle osd op reply. either call the callback if it is specified,
- * or do the completion to wake up the waiting thread.
- *
- * ->r_unsafe_callback is set? yes no
- *
- * first reply is OK (needed r_cb/r_completion, r_cb/r_completion,
- * any or needed/got safe) r_done_completion r_done_completion
- *
- * first reply is unsafe r_unsafe_cb(true) (nothing)
- *
- * when we get the safe reply r_unsafe_cb(false), r_cb/r_completion,
- * r_done_completion r_done_completion
+ * Handle MOSDOpReply. Set ->r_result and call the callback if it is
+ * specified.
*/
static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
{
@@ -2818,7 +2787,6 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
struct MOSDOpReply m;
u64 tid = le64_to_cpu(msg->hdr.tid);
u32 data_len = 0;
- bool already_acked;
int ret;
int i;
@@ -2897,50 +2865,22 @@ static void handle_reply(struct ceph_osd *osd, struct ceph_msg *msg)
le32_to_cpu(msg->hdr.data_len), req->r_tid);
goto fail_request;
}
- dout("%s req %p tid %llu acked %d result %d data_len %u\n", __func__,
- req, req->r_tid, req->r_got_reply, m.result, data_len);
-
- already_acked = req->r_got_reply;
- if (!already_acked) {
- req->r_result = m.result ?: data_len;
- req->r_replay_version = m.replay_version; /* struct */
- req->r_got_reply = true;
- } else if (!(m.flags & CEPH_OSD_FLAG_ONDISK)) {
- dout("req %p tid %llu dup ack\n", req, req->r_tid);
- goto out_unlock_session;
- }
-
- if (done_request(req, &m)) {
- finish_request(req);
- if (req->r_linger) {
- WARN_ON(req->r_unsafe_callback);
- dout("req %p tid %llu cb (locked)\n", req, req->r_tid);
- __complete_request(req);
- }
- }
+ dout("%s req %p tid %llu result %d data_len %u\n", __func__,
+ req, req->r_tid, m.result, data_len);
+ /*
+ * Since we only ever request ONDISK, we should only ever get
+ * one (type of) reply back.
+ */
+ WARN_ON(!(m.flags & CEPH_OSD_FLAG_ONDISK));
+ req->r_result = m.result ?: data_len;
+ finish_request(req);
mutex_unlock(&osd->lock);
up_read(&osdc->lock);
- if (done_request(req, &m)) {
- if (already_acked && req->r_unsafe_callback) {
- dout("req %p tid %llu safe-cb\n", req, req->r_tid);
- req->r_unsafe_callback(req, false);
- } else if (!req->r_linger) {
- dout("req %p tid %llu cb\n", req, req->r_tid);
- __complete_request(req);
- }
- complete_all(&req->r_done_completion);
- ceph_osdc_put_request(req);
- } else {
- if (req->r_unsafe_callback) {
- dout("req %p tid %llu unsafe-cb\n", req, req->r_tid);
- req->r_unsafe_callback(req, true);
- } else {
- WARN_ON(1);
- }
- }
-
+ __complete_request(req);
+ complete_all(&req->r_completion);
+ ceph_osdc_put_request(req);
return;
fail_request:
@@ -3540,7 +3480,7 @@ again:
up_read(&osdc->lock);
dout("%s waiting on req %p tid %llu last_tid %llu\n",
__func__, req, req->r_tid, last_tid);
- wait_for_completion(&req->r_done_completion);
+ wait_for_completion(&req->r_completion);
ceph_osdc_put_request(req);
goto again;
}
@@ -3599,7 +3539,7 @@ ceph_osdc_watch(struct ceph_osd_client *osdc,
ceph_oid_copy(&lreq->t.base_oid, oid);
ceph_oloc_copy(&lreq->t.base_oloc, oloc);
- lreq->t.flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+ lreq->t.flags = CEPH_OSD_FLAG_WRITE;
lreq->mtime = CURRENT_TIME;
lreq->reg_req = alloc_linger_request(lreq);
@@ -3657,7 +3597,7 @@ int ceph_osdc_unwatch(struct ceph_osd_client *osdc,
ceph_oid_copy(&req->r_base_oid, &lreq->t.base_oid);
ceph_oloc_copy(&req->r_base_oloc, &lreq->t.base_oloc);
- req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ONDISK;
+ req->r_flags = CEPH_OSD_FLAG_WRITE;
req->r_mtime = CURRENT_TIME;
osd_req_op_watch_init(req, 0, lreq->linger_id,
CEPH_OSD_WATCH_OP_UNWATCH);
@@ -4022,7 +3962,7 @@ EXPORT_SYMBOL(ceph_osdc_maybe_request_map);
* Execute an OSD class method on an object.
*
* @flags: CEPH_OSD_FLAG_*
- * @resp_len: out param for reply length
+ * @resp_len: in/out param for reply length
*/
int ceph_osdc_call(struct ceph_osd_client *osdc,
struct ceph_object_id *oid,
@@ -4035,6 +3975,9 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
struct ceph_osd_request *req;
int ret;
+ if (req_len > PAGE_SIZE || (resp_page && *resp_len > PAGE_SIZE))
+ return -E2BIG;
+
req = ceph_osdc_alloc_request(osdc, NULL, 1, false, GFP_NOIO);
if (!req)
return -ENOMEM;
@@ -4053,7 +3996,7 @@ int ceph_osdc_call(struct ceph_osd_client *osdc,
0, false, false);
if (resp_page)
osd_req_op_cls_response_data_pages(req, 0, &resp_page,
- PAGE_SIZE, 0, false, false);
+ *resp_len, 0, false, false);
ceph_osdc_start_request(osdc, req, false);
ret = ceph_osdc_wait_request(osdc, req);
@@ -4220,8 +4163,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
int page_align = off & ~PAGE_MASK;
req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
- CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
+ CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq, truncate_size,
true);
if (IS_ERR(req))
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index d2436880b305..6824c0ec8373 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -153,6 +153,32 @@ bad:
return -EINVAL;
}
+static void crush_finalize(struct crush_map *c)
+{
+ __s32 b;
+
+ /* Space for the array of pointers to per-bucket workspace */
+ c->working_size = sizeof(struct crush_work) +
+ c->max_buckets * sizeof(struct crush_work_bucket *);
+
+ for (b = 0; b < c->max_buckets; b++) {
+ if (!c->buckets[b])
+ continue;
+
+ switch (c->buckets[b]->alg) {
+ default:
+ /*
+ * The base case, permutation variables and
+ * the pointer to the permutation array.
+ */
+ c->working_size += sizeof(struct crush_work_bucket);
+ break;
+ }
+ /* Every bucket has a permutation array. */
+ c->working_size += c->buckets[b]->size * sizeof(__u32);
+ }
+}
+
static struct crush_map *crush_decode(void *pbyval, void *end)
{
struct crush_map *c;
@@ -246,10 +272,6 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
if (b->items == NULL)
goto badmem;
- b->perm = kcalloc(b->size, sizeof(u32), GFP_NOFS);
- if (b->perm == NULL)
- goto badmem;
- b->perm_n = 0;
ceph_decode_need(p, end, b->size*sizeof(u32), bad);
for (j = 0; j < b->size; j++)
@@ -368,6 +390,8 @@ static struct crush_map *crush_decode(void *pbyval, void *end)
dout("crush decode tunable chooseleaf_stable = %d\n",
c->chooseleaf_stable);
+ crush_finalize(c);
+
done:
dout("crush_decode success\n");
return c;
@@ -719,7 +743,7 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
map->pool_max = -1;
map->pg_temp = RB_ROOT;
map->primary_temp = RB_ROOT;
- mutex_init(&map->crush_scratch_mutex);
+ mutex_init(&map->crush_workspace_mutex);
return map;
}
@@ -753,6 +777,7 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
kfree(map->osd_weight);
kfree(map->osd_addr);
kfree(map->osd_primary_affinity);
+ kfree(map->crush_workspace);
kfree(map);
}
@@ -808,6 +833,31 @@ static int osdmap_set_max_osd(struct ceph_osdmap *map, int max)
return 0;
}
+static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
+{
+ void *workspace;
+ size_t work_size;
+
+ if (IS_ERR(crush))
+ return PTR_ERR(crush);
+
+ work_size = crush_work_size(crush, CEPH_PG_MAX_SIZE);
+ dout("%s work_size %zu bytes\n", __func__, work_size);
+ workspace = kmalloc(work_size, GFP_NOIO);
+ if (!workspace) {
+ crush_destroy(crush);
+ return -ENOMEM;
+ }
+ crush_init_workspace(crush, workspace);
+
+ if (map->crush)
+ crush_destroy(map->crush);
+ kfree(map->crush_workspace);
+ map->crush = crush;
+ map->crush_workspace = workspace;
+ return 0;
+}
+
#define OSDMAP_WRAPPER_COMPAT_VER 7
#define OSDMAP_CLIENT_DATA_COMPAT_VER 1
@@ -1214,13 +1264,9 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
/* crush */
ceph_decode_32_safe(p, end, len, e_inval);
- map->crush = crush_decode(*p, min(*p + len, end));
- if (IS_ERR(map->crush)) {
- err = PTR_ERR(map->crush);
- map->crush = NULL;
+ err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end)));
+ if (err)
goto bad;
- }
- *p += len;
/* ignore the rest */
*p = end;
@@ -1375,7 +1421,6 @@ e_inval:
struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
struct ceph_osdmap *map)
{
- struct crush_map *newcrush = NULL;
struct ceph_fsid fsid;
u32 epoch = 0;
struct ceph_timespec modified;
@@ -1414,12 +1459,10 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
/* new crush? */
ceph_decode_32_safe(p, end, len, e_inval);
if (len > 0) {
- newcrush = crush_decode(*p, min(*p+len, end));
- if (IS_ERR(newcrush)) {
- err = PTR_ERR(newcrush);
- newcrush = NULL;
+ err = osdmap_set_crush(map,
+ crush_decode(*p, min(*p + len, end)));
+ if (err)
goto bad;
- }
*p += len;
}
@@ -1439,12 +1482,6 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
map->epoch++;
map->modified = modified;
- if (newcrush) {
- if (map->crush)
- crush_destroy(map->crush);
- map->crush = newcrush;
- newcrush = NULL;
- }
/* new_pools */
err = decode_new_pools(p, end, map);
@@ -1505,8 +1542,6 @@ bad:
print_hex_dump(KERN_DEBUG, "osdmap: ",
DUMP_PREFIX_OFFSET, 16, 1,
start, end - start, true);
- if (newcrush)
- crush_destroy(newcrush);
return ERR_PTR(err);
}
@@ -1942,10 +1977,10 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
BUG_ON(result_max > CEPH_PG_MAX_SIZE);
- mutex_lock(&map->crush_scratch_mutex);
+ mutex_lock(&map->crush_workspace_mutex);
r = crush_do_rule(map->crush, ruleno, x, result, result_max,
- weight, weight_max, map->crush_scratch_ary);
- mutex_unlock(&map->crush_scratch_mutex);
+ weight, weight_max, map->crush_workspace);
+ mutex_unlock(&map->crush_workspace_mutex);
return r;
}
@@ -1978,8 +2013,14 @@ static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
return;
}
- len = do_crush(osdmap, ruleno, pps, raw->osds,
- min_t(int, pi->size, ARRAY_SIZE(raw->osds)),
+ if (pi->size > ARRAY_SIZE(raw->osds)) {
+ pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n",
+ pi->id, pi->crush_ruleset, pi->type, pi->size,
+ ARRAY_SIZE(raw->osds));
+ return;
+ }
+
+ len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
osdmap->osd_weight, osdmap->max_osd);
if (len < 0) {
pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
diff --git a/net/ceph/snapshot.c b/net/ceph/snapshot.c
index 154683f5f14c..705414e78ae0 100644
--- a/net/ceph/snapshot.c
+++ b/net/ceph/snapshot.c
@@ -18,8 +18,6 @@
* 02110-1301, USA.
*/
-#include <stddef.h>
-
#include <linux/types.h>
#include <linux/export.h>
#include <linux/ceph/libceph.h>
diff --git a/net/core/dev.c b/net/core/dev.c
index 304f2deae5f9..8637b2b71f3d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1698,27 +1698,54 @@ EXPORT_SYMBOL_GPL(net_dec_egress_queue);
static struct static_key netstamp_needed __read_mostly;
#ifdef HAVE_JUMP_LABEL
static atomic_t netstamp_needed_deferred;
+static atomic_t netstamp_wanted;
static void netstamp_clear(struct work_struct *work)
{
int deferred = atomic_xchg(&netstamp_needed_deferred, 0);
+ int wanted;
- while (deferred--)
- static_key_slow_dec(&netstamp_needed);
+ wanted = atomic_add_return(deferred, &netstamp_wanted);
+ if (wanted > 0)
+ static_key_enable(&netstamp_needed);
+ else
+ static_key_disable(&netstamp_needed);
}
static DECLARE_WORK(netstamp_work, netstamp_clear);
#endif
void net_enable_timestamp(void)
{
+#ifdef HAVE_JUMP_LABEL
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 0)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted + 1) == wanted)
+ return;
+ }
+ atomic_inc(&netstamp_needed_deferred);
+ schedule_work(&netstamp_work);
+#else
static_key_slow_inc(&netstamp_needed);
+#endif
}
EXPORT_SYMBOL(net_enable_timestamp);
void net_disable_timestamp(void)
{
#ifdef HAVE_JUMP_LABEL
- /* net_disable_timestamp() can be called from non process context */
- atomic_inc(&netstamp_needed_deferred);
+ int wanted;
+
+ while (1) {
+ wanted = atomic_read(&netstamp_wanted);
+ if (wanted <= 1)
+ break;
+ if (atomic_cmpxchg(&netstamp_wanted, wanted, wanted - 1) == wanted)
+ return;
+ }
+ atomic_dec(&netstamp_needed_deferred);
schedule_work(&netstamp_work);
#else
static_key_slow_dec(&netstamp_needed);
@@ -4884,6 +4911,39 @@ void __napi_schedule(struct napi_struct *n)
EXPORT_SYMBOL(__napi_schedule);
/**
+ * napi_schedule_prep - check if napi can be scheduled
+ * @n: napi context
+ *
+ * Test if NAPI routine is already running, and if not mark
+ * it as running. This is used as a condition variable
+ * insure only one NAPI poll instance runs. We also make
+ * sure there is no pending NAPI disable.
+ */
+bool napi_schedule_prep(struct napi_struct *n)
+{
+ unsigned long val, new;
+
+ do {
+ val = READ_ONCE(n->state);
+ if (unlikely(val & NAPIF_STATE_DISABLE))
+ return false;
+ new = val | NAPIF_STATE_SCHED;
+
+ /* Sets STATE_MISSED bit if STATE_SCHED was already set
+ * This was suggested by Alexander Duyck, as compiler
+ * emits better code than :
+ * if (val & NAPIF_STATE_SCHED)
+ * new |= NAPIF_STATE_MISSED;
+ */
+ new |= (val & NAPIF_STATE_SCHED) / NAPIF_STATE_SCHED *
+ NAPIF_STATE_MISSED;
+ } while (cmpxchg(&n->state, val, new) != val);
+
+ return !(val & NAPIF_STATE_SCHED);
+}
+EXPORT_SYMBOL(napi_schedule_prep);
+
+/**
* __napi_schedule_irqoff - schedule for receive
* @n: entry to schedule
*
@@ -4897,7 +4957,7 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
bool napi_complete_done(struct napi_struct *n, int work_done)
{
- unsigned long flags;
+ unsigned long flags, val, new;
/*
* 1) Don't let napi dequeue from the cpu poll list
@@ -4927,7 +4987,27 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
list_del_init(&n->poll_list);
local_irq_restore(flags);
}
- WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
+
+ do {
+ val = READ_ONCE(n->state);
+
+ WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
+
+ new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED);
+
+ /* If STATE_MISSED was set, leave STATE_SCHED set,
+ * because we will call napi->poll() one more time.
+ * This C code was suggested by Alexander Duyck to help gcc.
+ */
+ new |= (val & NAPIF_STATE_MISSED) / NAPIF_STATE_MISSED *
+ NAPIF_STATE_SCHED;
+ } while (cmpxchg(&n->state, val, new) != val);
+
+ if (unlikely(val & NAPIF_STATE_MISSED)) {
+ __napi_schedule(n);
+ return false;
+ }
+
return true;
}
EXPORT_SYMBOL(napi_complete_done);
@@ -4953,6 +5033,16 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
{
int rc;
+ /* Busy polling means there is a high chance device driver hard irq
+ * could not grab NAPI_STATE_SCHED, and that NAPI_STATE_MISSED was
+ * set in napi_schedule_prep().
+ * Since we are about to call napi->poll() once more, we can safely
+ * clear NAPI_STATE_MISSED.
+ *
+ * Note: x86 could use a single "lock and ..." instruction
+ * to perform these two clear_bit()
+ */
+ clear_bit(NAPI_STATE_MISSED, &napi->state);
clear_bit(NAPI_STATE_IN_BUSY_POLL, &napi->state);
local_bh_disable();
@@ -5088,8 +5178,13 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
struct napi_struct *napi;
napi = container_of(timer, struct napi_struct, timer);
- if (napi->gro_list)
- napi_schedule_irqoff(napi);
+
+ /* Note : we use a relaxed variant of napi_schedule_prep() not setting
+ * NAPI_STATE_MISSED, since we do not react to a device IRQ.
+ */
+ if (napi->gro_list && !napi_disable_pending(napi) &&
+ !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
+ __napi_schedule_irqoff(napi);
return HRTIMER_NORESTART;
}
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index be7bab1adcde..aecb2c7241b6 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -24,7 +24,7 @@
#include <linux/vmalloc.h>
#include <linux/slab.h>
#include <linux/rtnetlink.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/net.h>
/*
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b0c04cf4851d..3945821e9c1f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -15,6 +15,7 @@
#include <net/switchdev.h>
#include <linux/if_arp.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
#include <linux/nsproxy.h>
#include <net/sock.h>
#include <net/net_namespace.h>
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 3c4bbec39713..652468ff65b7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -16,6 +16,8 @@
#include <linux/export.h>
#include <linux/user_namespace.h>
#include <linux/net_namespace.h>
+#include <linux/sched/task.h>
+
#include <net/sock.h>
#include <net/netlink.h>
#include <net/net_namespace.h>
diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c
index 11fce17274f6..6ae56037bb13 100644
--- a/net/core/netclassid_cgroup.c
+++ b/net/core/netclassid_cgroup.c
@@ -12,6 +12,8 @@
#include <linux/slab.h>
#include <linux/cgroup.h>
#include <linux/fdtable.h>
+#include <linux/sched/task.h>
+
#include <net/cls_cgroup.h>
#include <net/sock.h>
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
index 756637dc7a57..0f9275ee5595 100644
--- a/net/core/netprio_cgroup.c
+++ b/net/core/netprio_cgroup.c
@@ -20,6 +20,8 @@
#include <linux/cgroup.h>
#include <linux/rcupdate.h>
#include <linux/atomic.h>
+#include <linux/sched/task.h>
+
#include <net/rtnetlink.h>
#include <net/pkt_cls.h>
#include <net/sock.h>
diff --git a/net/core/scm.c b/net/core/scm.c
index b6d83686e149..b1ff8a441748 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -14,6 +14,7 @@
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/sched.h>
+#include <linux/sched/user.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/stat.h>
diff --git a/net/core/sock.c b/net/core/sock.c
index e7d74940e863..f6fd79f33097 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1539,11 +1539,7 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
is_charged = sk_filter_charge(newsk, filter);
if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- newsk->sk_destruct = NULL;
- bh_unlock_sock(newsk);
- sk_free(newsk);
+ sk_free_unlock_clone(newsk);
newsk = NULL;
goto out;
}
@@ -1592,6 +1588,16 @@ out:
}
EXPORT_SYMBOL_GPL(sk_clone_lock);
+void sk_free_unlock_clone(struct sock *sk)
+{
+ /* It is still raw copy of parent, so invalidate
+ * destructor and make plain sk_free() */
+ sk->sk_destruct = NULL;
+ bh_unlock_sock(sk);
+ sk_free(sk);
+}
+EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
diff --git a/net/core/stream.c b/net/core/stream.c
index f575bcf64af2..20231dbb1da0 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -13,6 +13,7 @@
*/
#include <linux/module.h>
+#include <linux/sched/signal.h>
#include <linux/net.h>
#include <linux/signal.h>
#include <linux/tcp.h>
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 8fedc2d49770..4a05d7876850 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -577,6 +577,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const int old_state = sk->sk_state;
+ bool acceptable;
int queued = 0;
/*
@@ -603,8 +604,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/
if (sk->sk_state == DCCP_LISTEN) {
if (dh->dccph_type == DCCP_PKT_REQUEST) {
- if (inet_csk(sk)->icsk_af_ops->conn_request(sk,
- skb) < 0)
+ /* It is possible that we process SYN packets from backlog,
+ * so we need to make sure to disable BH right there.
+ */
+ local_bh_disable();
+ acceptable = inet_csk(sk)->icsk_af_ops->conn_request(sk, skb) >= 0;
+ local_bh_enable();
+ if (!acceptable)
return 1;
consume_skb(skb);
return 0;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 53eddf99e4f6..e267e6f4c9a5 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -119,10 +119,7 @@ struct sock *dccp_create_openreq_child(const struct sock *sk,
* Activate features: initialise CCIDs, sequence windows etc.
*/
if (dccp_feat_activate_values(newsk, &dreq->dreq_featneg)) {
- /* It is still raw copy of parent, so invalidate
- * destructor and make plain sk_free() */
- newsk->sk_destruct = NULL;
- sk_free(newsk);
+ sk_free_unlock_clone(newsk);
return NULL;
}
dccp_init_xmit_timers(newsk);
diff --git a/net/dccp/output.c b/net/dccp/output.c
index b66c84db0766..91a15b3c4915 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
#include <net/inet_sock.h>
#include <net/sock.h>
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index a90ed67027b0..e6e79eda9763 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -106,7 +106,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat
#include <linux/socket.h>
#include <linux/in.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index ecc28cff08ab..af781010753b 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -37,8 +37,10 @@
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/cred.h>
#include <linux/dns_resolver.h>
#include <linux/err.h>
+
#include <keys/dns_resolver-type.h>
#include <keys/user-type.h>
@@ -70,7 +72,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
const char *options, char **_result, time64_t *_expiry)
{
struct key *rkey;
- const struct user_key_payload *upayload;
+ struct user_key_payload *upayload;
const struct cred *saved_cred;
size_t typelen, desclen;
char *desc, *cp;
@@ -141,7 +143,7 @@ int dns_query(const char *type, const char *name, size_t namelen,
if (ret)
goto put;
- upayload = user_key_payload(rkey);
+ upayload = user_key_payload_locked(rkey);
len = upayload->datalen;
ret = -ENOMEM;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5d367b7ff542..cebedd545e5e 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -32,6 +32,7 @@
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/sched/signal.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index b39a791f6756..42bfd08109dd 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -622,6 +622,7 @@ const struct nla_policy rtm_ipv4_policy[RTA_MAX + 1] = {
[RTA_ENCAP_TYPE] = { .type = NLA_U16 },
[RTA_ENCAP] = { .type = NLA_NESTED },
[RTA_UID] = { .type = NLA_U32 },
+ [RTA_MARK] = { .type = NLA_U32 },
};
static int rtm_to_fib_config(struct net *net, struct sk_buff *skb,
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index b3cc1335adbc..c0cc6aa8cfaa 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -23,7 +23,8 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
struct rtable *rt;
struct flowi4 fl4 = {};
__be32 saddr = iph->saddr;
- __u8 flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0;
+ const struct sock *sk = skb_to_full_sk(skb);
+ __u8 flags = sk ? inet_sk_flowi_flags(sk) : 0;
struct net_device *dev = skb_dst(skb)->dev;
unsigned int hh_len;
@@ -40,7 +41,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
fl4.daddr = iph->daddr;
fl4.saddr = saddr;
fl4.flowi4_tos = RT_TOS(iph->tos);
- fl4.flowi4_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0;
+ fl4.flowi4_oif = sk ? sk->sk_bound_dev_if : 0;
if (!fl4.flowi4_oif)
fl4.flowi4_oif = l3mdev_master_ifindex(dev);
fl4.flowi4_mark = skb->mark;
@@ -61,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned int addr_t
xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) {
struct dst_entry *dst = skb_dst(skb);
skb_dst_set(skb, NULL);
- dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), skb->sk, 0);
+ dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0);
if (IS_ERR(dst))
return PTR_ERR(dst);
skb_dst_set(skb, dst);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index da385ae997a3..cf4555581282 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1110,9 +1110,14 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags, 1);
- inet->defer_connect = 0;
- *copied = tp->fastopen_req->copied;
- tcp_free_fastopen_req(tp);
+ /* fastopen_req could already be freed in __inet_stream_connect
+ * if the connection times out or gets rst
+ */
+ if (tp->fastopen_req) {
+ *copied = tp->fastopen_req->copied;
+ tcp_free_fastopen_req(tp);
+ inet->defer_connect = 0;
+ }
return err;
}
@@ -2318,6 +2323,10 @@ int tcp_disconnect(struct sock *sk, int flags)
memset(&tp->rx_opt, 0, sizeof(tp->rx_opt));
__sk_dst_reset(sk);
+ /* Clean up fastopen related fields */
+ tcp_free_fastopen_req(tp);
+ inet->defer_connect = 0;
+
WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
sk->sk_error_report(sk);
diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c
index 35b280361cb2..50a0f3e51d5b 100644
--- a/net/ipv4/tcp_cdg.c
+++ b/net/ipv4/tcp_cdg.c
@@ -27,6 +27,8 @@
#include <linux/kernel.h>
#include <linux/random.h>
#include <linux/module.h>
+#include <linux/sched/clock.h>
+
#include <net/tcp.h>
#define HYSTART_ACK_TRAIN 1
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2c0ff327b6df..39c393cc0fd3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5886,9 +5886,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)
if (th->syn) {
if (th->fin)
goto discard;
- if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
- return 1;
+ /* It is possible that we process SYN packets from backlog,
+ * so we need to make sure to disable BH right there.
+ */
+ local_bh_disable();
+ acceptable = icsk->icsk_af_ops->conn_request(sk, skb) >= 0;
+ local_bh_enable();
+ if (!acceptable)
+ return 1;
consume_skb(skb);
return 0;
}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3a2025f5bf2c..363172527e43 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -43,6 +43,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/sched/signal.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/net.h>
@@ -5692,13 +5693,18 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1;
struct net *net = (struct net *)ctl->extra2;
+ if (!rtnl_trylock())
+ return restart_syscall();
+
ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
if (write) {
new_val = *((int *)ctl->data);
- if (check_addr_gen_mode(new_val) < 0)
- return -EINVAL;
+ if (check_addr_gen_mode(new_val) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
/* request for default */
if (&net->ipv6.devconf_dflt->addr_gen_mode == ctl->data) {
@@ -5707,20 +5713,23 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
/* request for individual net device */
} else {
if (!idev)
- return ret;
+ goto out;
- if (check_stable_privacy(idev, net, new_val) < 0)
- return -EINVAL;
+ if (check_stable_privacy(idev, net, new_val) < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
if (idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- rtnl_lock();
addrconf_dev_config(idev->dev);
- rtnl_unlock();
}
}
}
+out:
+ rtnl_unlock();
+
return ret;
}
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 9948b5ce52da..986d4ca38832 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -589,6 +589,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
hdr = ipv6_hdr(skb);
fhdr = (struct frag_hdr *)skb_transport_header(skb);
+ skb_orphan(skb);
fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
if (fq == NULL) {
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index f54f4265b37f..229bfcc451ef 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2169,10 +2169,13 @@ int ip6_del_rt(struct rt6_info *rt)
static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
{
struct nl_info *info = &cfg->fc_nlinfo;
+ struct net *net = info->nl_net;
struct sk_buff *skb = NULL;
struct fib6_table *table;
- int err;
+ int err = -ENOENT;
+ if (rt == net->ipv6.ip6_null_entry)
+ goto out_put;
table = rt->rt6i_table;
write_lock_bh(&table->tb6_lock);
@@ -2184,7 +2187,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
if (skb) {
u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
- if (rt6_fill_node(info->nl_net, skb, rt,
+ if (rt6_fill_node(net, skb, rt,
NULL, NULL, 0, RTM_DELROUTE,
info->portid, seq, 0) < 0) {
kfree_skb(skb);
@@ -2198,17 +2201,18 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg)
rt6i_siblings) {
err = fib6_del(sibling, info);
if (err)
- goto out;
+ goto out_unlock;
}
}
err = fib6_del(rt, info);
-out:
+out_unlock:
write_unlock_bh(&table->tb6_lock);
+out_put:
ip6_rt_put(rt);
if (skb) {
- rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV6_ROUTE,
+ rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE,
info->nlh, gfp_any());
}
return err;
@@ -2891,6 +2895,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
[RTA_ENCAP] = { .type = NLA_NESTED },
[RTA_EXPIRES] = { .type = NLA_U32 },
[RTA_UID] = { .type = NLA_U32 },
+ [RTA_MARK] = { .type = NLA_U32 },
};
static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -3627,6 +3632,12 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh)
rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6);
}
+ if (rt == net->ipv6.ip6_null_entry) {
+ err = rt->dst.error;
+ ip6_rt_put(rt);
+ goto errout;
+ }
+
skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
if (!skb) {
ip6_rt_put(rt);
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index ab254041dab7..81adc29a448d 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -46,6 +46,7 @@
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
#include <linux/init.h>
#include <linux/net.h>
#include <linux/irda.h>
diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c
index 817b1b186aff..f6061c4bb0a8 100644
--- a/net/irda/ircomm/ircomm_tty.c
+++ b/net/irda/ircomm/ircomm_tty.c
@@ -32,7 +32,7 @@
#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/seq_file.h>
#include <linux/termios.h>
#include <linux/tty.h>
diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c
index 35dbf3dc3d28..7025dcb853d0 100644
--- a/net/irda/irnet/irnet_ppp.c
+++ b/net/irda/irnet/irnet_ppp.c
@@ -13,8 +13,9 @@
* 2) as a control channel (write commands, read events)
*/
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
+
#include "irnet_ppp.h" /* Private header */
/* Please put other headers in irnet.h - Thanks */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 13190b38f22e..89bbde1081ce 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -17,7 +17,7 @@
#include <linux/list.h>
#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/init.h>
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index a646f3481240..309062f3debe 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -24,6 +24,8 @@
#include <linux/uaccess.h>
#include <linux/workqueue.h>
#include <linux/syscalls.h>
+#include <linux/sched/signal.h>
+
#include <net/kcm.h>
#include <net/netns/generic.h>
#include <net/sock.h>
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 5e9296382420..06186d608a27 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -26,6 +26,8 @@
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/sched/signal.h>
+
#include <net/llc.h>
#include <net/llc_sap.h>
#include <net/llc_pdu.h>
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 3b5fd4188f2a..4456559cb056 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -85,7 +85,7 @@ void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
ht_dbg(sta->sdata,
"Rx BA session stop requested for %pM tid %u %s reason: %d\n",
sta->sta.addr, tid,
- initiator == WLAN_BACK_RECIPIENT ? "recipient" : "inititator",
+ initiator == WLAN_BACK_RECIPIENT ? "recipient" : "initiator",
(int)reason);
if (drv_ampdu_action(local, sta->sdata, &params))
@@ -398,6 +398,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta,
tid_agg_rx->timeout = timeout;
tid_agg_rx->stored_mpdu_num = 0;
tid_agg_rx->auto_seq = auto_seq;
+ tid_agg_rx->started = false;
tid_agg_rx->reorder_buf_filtered = 0;
status = WLAN_STATUS_SUCCESS;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 159a1a733725..0e718437d080 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -428,7 +428,7 @@ struct ieee80211_sta_tx_tspec {
bool downgraded;
};
-DECLARE_EWMA(beacon_signal, 16, 4)
+DECLARE_EWMA(beacon_signal, 4, 4)
struct ieee80211_if_managed {
struct timer_list timer;
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index fcba70e57073..953d71e784a9 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -9,6 +9,8 @@
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/random.h>
+#include <linux/rculist.h>
+
#include "ieee80211_i.h"
#include "rate.h"
#include "mesh.h"
diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c
index 28a3a0957c9e..76a8bcd8ef11 100644
--- a/net/mac80211/pm.c
+++ b/net/mac80211/pm.c
@@ -168,6 +168,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan)
break;
}
+ flush_delayed_work(&sdata->dec_tailroom_needed_wk);
drv_remove_interface(local, sdata);
}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 50ca3828b124..e48724a6725e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007-2010 Johannes Berg <johannes@sipsolutions.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
- * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
+ * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -1034,6 +1034,18 @@ static bool ieee80211_sta_manage_reorder_buf(struct ieee80211_sub_if_data *sdata
buf_size = tid_agg_rx->buf_size;
head_seq_num = tid_agg_rx->head_seq_num;
+ /*
+ * If the current MPDU's SN is smaller than the SSN, it shouldn't
+ * be reordered.
+ */
+ if (unlikely(!tid_agg_rx->started)) {
+ if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
+ ret = false;
+ goto out;
+ }
+ tid_agg_rx->started = true;
+ }
+
/* frame with out of date sequence number */
if (ieee80211_sn_less(mpdu_seq_num, head_seq_num)) {
dev_kfree_skb(skb);
@@ -3880,6 +3892,7 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
stats->last_rate = sta_stats_encode_rate(status);
stats->fragments++;
+ stats->packets++;
if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) {
stats->last_signal = status->signal;
@@ -4073,15 +4086,17 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
ieee80211_is_beacon(hdr->frame_control)))
ieee80211_scan_rx(local, skb);
- if (pubsta) {
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
- if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
- return;
- goto out;
- } else if (ieee80211_is_data(fc)) {
+ if (ieee80211_is_data(fc)) {
struct sta_info *sta, *prev_sta;
+ if (pubsta) {
+ rx.sta = container_of(pubsta, struct sta_info, sta);
+ rx.sdata = rx.sta->sdata;
+ if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
+ return;
+ goto out;
+ }
+
prev_sta = NULL;
for_each_sta_info(local, hdr->addr2, sta, tmp) {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 4774e663a411..3323a2fb289b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -688,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending)
}
/* No need to do anything if the driver does all */
- if (ieee80211_hw_check(&local->hw, AP_LINK_PS))
+ if (ieee80211_hw_check(&local->hw, AP_LINK_PS) && !local->ops->set_tim)
return;
if (sta->dead)
@@ -1264,7 +1264,7 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta)
sta_info_recalc_tim(sta);
ps_dbg(sdata,
- "STA %pM aid %d sending %d filtered/%d PS frames since STA not sleeping anymore\n",
+ "STA %pM aid %d sending %d filtered/%d PS frames since STA woke up\n",
sta->sta.addr, sta->sta.aid, filtered, buffered);
ieee80211_check_fast_xmit(sta);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dd06ef0b8861..e65cda34d2bc 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -189,6 +189,7 @@ struct tid_ampdu_tx {
* @auto_seq: used for offloaded BA sessions to automatically pick head_seq_and
* and ssn.
* @removed: this session is removed (but might have been found due to RCU)
+ * @started: this session has started (head ssn or higher was received)
*
* This structure's lifetime is managed by RCU, assignments to
* the array holding it must hold the aggregation mutex.
@@ -212,8 +213,9 @@ struct tid_ampdu_rx {
u16 ssn;
u16 buf_size;
u16 timeout;
- bool auto_seq;
- bool removed;
+ u8 auto_seq:1,
+ removed:1,
+ started:1;
};
/**
@@ -370,7 +372,7 @@ struct mesh_sta {
unsigned int fail_avg;
};
-DECLARE_EWMA(signal, 1024, 8)
+DECLARE_EWMA(signal, 10, 8)
struct ieee80211_sta_rx_stats {
unsigned long packets;
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index a3af6e1bfd98..83b8b11f24ea 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -51,7 +51,8 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
struct ieee80211_hdr *hdr = (void *)skb->data;
int ac;
- if (info->flags & IEEE80211_TX_CTL_NO_PS_BUFFER) {
+ if (info->flags & (IEEE80211_TX_CTL_NO_PS_BUFFER |
+ IEEE80211_TX_CTL_AMPDU)) {
ieee80211_free_txskb(&local->hw, skb);
return;
}
@@ -462,9 +463,7 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
unsigned long flags;
spin_lock_irqsave(&local->ack_status_lock, flags);
- skb = idr_find(&local->ack_status_frames, info->ack_frame_id);
- if (skb)
- idr_remove(&local->ack_status_frames, info->ack_frame_id);
+ skb = idr_remove(&local->ack_status_frames, info->ack_frame_id);
spin_unlock_irqrestore(&local->ack_status_lock, flags);
if (!skb)
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 6a3e1c2181d3..1e1c9b20bab7 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -18,6 +18,8 @@
#include <linux/bug.h>
#include <linux/completion.h>
#include <linux/ieee802154.h>
+#include <linux/rculist.h>
+
#include <crypto/aead.h>
#include <crypto/skcipher.h>
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 24174c520239..0d17894798b5 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -1628,8 +1628,6 @@ static int __init nf_conntrack_sip_init(void)
ports[ports_c++] = SIP_PORT;
for (i = 0; i < ports_c; i++) {
- memset(&sip[i], 0, sizeof(sip[i]));
-
nf_ct_helper_init(&sip[4 * i], AF_INET, IPPROTO_UDP, "sip",
SIP_PORT, ports[i], i, sip_exp_policy,
SIP_EXPECT_MAX,
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ff7304ae58ac..5e0ccfd5bb37 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -461,16 +461,15 @@ nla_put_failure:
return -1;
}
-static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
+static void nf_tables_table_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
int err;
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
- return 0;
+ return;
- err = -ENOBUFS;
skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto err;
@@ -482,14 +481,11 @@ static int nf_tables_table_notify(const struct nft_ctx *ctx, int event)
goto err;
}
- err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
- ctx->report, GFP_KERNEL);
+ nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
+ return;
err:
- if (err < 0) {
- nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
- err);
- }
- return err;
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
static int nf_tables_dump_tables(struct sk_buff *skb,
@@ -1050,16 +1046,15 @@ nla_put_failure:
return -1;
}
-static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
+static void nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
{
struct sk_buff *skb;
int err;
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
- return 0;
+ return;
- err = -ENOBUFS;
skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto err;
@@ -1072,14 +1067,11 @@ static int nf_tables_chain_notify(const struct nft_ctx *ctx, int event)
goto err;
}
- err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
- ctx->report, GFP_KERNEL);
+ nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
+ return;
err:
- if (err < 0) {
- nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
- err);
- }
- return err;
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
static int nf_tables_dump_chains(struct sk_buff *skb,
@@ -1934,18 +1926,16 @@ nla_put_failure:
return -1;
}
-static int nf_tables_rule_notify(const struct nft_ctx *ctx,
- const struct nft_rule *rule,
- int event)
+static void nf_tables_rule_notify(const struct nft_ctx *ctx,
+ const struct nft_rule *rule, int event)
{
struct sk_buff *skb;
int err;
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
- return 0;
+ return;
- err = -ENOBUFS;
skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto err;
@@ -1958,14 +1948,11 @@ static int nf_tables_rule_notify(const struct nft_ctx *ctx,
goto err;
}
- err = nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
- ctx->report, GFP_KERNEL);
+ nfnetlink_send(skb, ctx->net, ctx->portid, NFNLGRP_NFTABLES,
+ ctx->report, GFP_KERNEL);
+ return;
err:
- if (err < 0) {
- nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES,
- err);
- }
- return err;
+ nfnetlink_set_err(ctx->net, ctx->portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
struct nft_rule_dump_ctx {
@@ -2696,9 +2683,9 @@ nla_put_failure:
return -1;
}
-static int nf_tables_set_notify(const struct nft_ctx *ctx,
- const struct nft_set *set,
- int event, gfp_t gfp_flags)
+static void nf_tables_set_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set, int event,
+ gfp_t gfp_flags)
{
struct sk_buff *skb;
u32 portid = ctx->portid;
@@ -2706,9 +2693,8 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
if (!ctx->report &&
!nfnetlink_has_listeners(ctx->net, NFNLGRP_NFTABLES))
- return 0;
+ return;
- err = -ENOBUFS;
skb = nlmsg_new(NLMSG_GOODSIZE, gfp_flags);
if (skb == NULL)
goto err;
@@ -2719,12 +2705,11 @@ static int nf_tables_set_notify(const struct nft_ctx *ctx,
goto err;
}
- err = nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES,
- ctx->report, gfp_flags);
+ nfnetlink_send(skb, ctx->net, portid, NFNLGRP_NFTABLES, ctx->report,
+ gfp_flags);
+ return;
err:
- if (err < 0)
- nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, err);
- return err;
+ nfnetlink_set_err(ctx->net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
static int nf_tables_dump_sets(struct sk_buff *skb, struct netlink_callback *cb)
@@ -3504,10 +3489,10 @@ nla_put_failure:
return -1;
}
-static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
- const struct nft_set *set,
- const struct nft_set_elem *elem,
- int event, u16 flags)
+static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
+ const struct nft_set *set,
+ const struct nft_set_elem *elem,
+ int event, u16 flags)
{
struct net *net = ctx->net;
u32 portid = ctx->portid;
@@ -3515,9 +3500,8 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
int err;
if (!ctx->report && !nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
- return 0;
+ return;
- err = -ENOBUFS;
skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb == NULL)
goto err;
@@ -3529,12 +3513,11 @@ static int nf_tables_setelem_notify(const struct nft_ctx *ctx,
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
- GFP_KERNEL);
+ nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, ctx->report,
+ GFP_KERNEL);
+ return;
err:
- if (err < 0)
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
- return err;
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
@@ -4476,18 +4459,17 @@ static int nf_tables_delobj(struct net *net, struct sock *nlsk,
return nft_delobj(&ctx, obj);
}
-int nft_obj_notify(struct net *net, struct nft_table *table,
- struct nft_object *obj, u32 portid, u32 seq, int event,
- int family, int report, gfp_t gfp)
+void nft_obj_notify(struct net *net, struct nft_table *table,
+ struct nft_object *obj, u32 portid, u32 seq, int event,
+ int family, int report, gfp_t gfp)
{
struct sk_buff *skb;
int err;
if (!report &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
- return 0;
+ return;
- err = -ENOBUFS;
skb = nlmsg_new(NLMSG_GOODSIZE, gfp);
if (skb == NULL)
goto err;
@@ -4499,21 +4481,18 @@ int nft_obj_notify(struct net *net, struct nft_table *table,
goto err;
}
- err = nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+ nfnetlink_send(skb, net, portid, NFNLGRP_NFTABLES, report, gfp);
+ return;
err:
- if (err < 0) {
- nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, err);
- }
- return err;
+ nfnetlink_set_err(net, portid, NFNLGRP_NFTABLES, -ENOBUFS);
}
EXPORT_SYMBOL_GPL(nft_obj_notify);
-static int nf_tables_obj_notify(const struct nft_ctx *ctx,
- struct nft_object *obj, int event)
+static void nf_tables_obj_notify(const struct nft_ctx *ctx,
+ struct nft_object *obj, int event)
{
- return nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid,
- ctx->seq, event, ctx->afi->family, ctx->report,
- GFP_KERNEL);
+ nft_obj_notify(ctx->net, ctx->table, obj, ctx->portid, ctx->seq, event,
+ ctx->afi->family, ctx->report, GFP_KERNEL);
}
static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
@@ -4543,7 +4522,8 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
+static void nf_tables_gen_notify(struct net *net, struct sk_buff *skb,
+ int event)
{
struct nlmsghdr *nlh = nlmsg_hdr(skb);
struct sk_buff *skb2;
@@ -4551,9 +4531,8 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
if (nlmsg_report(nlh) &&
!nfnetlink_has_listeners(net, NFNLGRP_NFTABLES))
- return 0;
+ return;
- err = -ENOBUFS;
skb2 = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
if (skb2 == NULL)
goto err;
@@ -4565,14 +4544,12 @@ static int nf_tables_gen_notify(struct net *net, struct sk_buff *skb, int event)
goto err;
}
- err = nfnetlink_send(skb2, net, NETLINK_CB(skb).portid,
- NFNLGRP_NFTABLES, nlmsg_report(nlh), GFP_KERNEL);
+ nfnetlink_send(skb2, net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+ nlmsg_report(nlh), GFP_KERNEL);
+ return;
err:
- if (err < 0) {
- nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
- err);
- }
- return err;
+ nfnetlink_set_err(net, NETLINK_CB(skb).portid, NFNLGRP_NFTABLES,
+ -ENOBUFS);
}
static int nf_tables_getgen(struct net *net, struct sock *nlsk,
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 71e8fb886a73..78dfbf9588b3 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -60,11 +60,10 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
d = memcmp(this, key, set->klen);
if (d < 0) {
parent = parent->rb_left;
- /* In case of adjacent ranges, we always see the high
- * part of the range in first place, before the low one.
- * So don't update interval if the keys are equal.
- */
- if (interval && nft_rbtree_equal(set, this, interval))
+ if (interval &&
+ nft_rbtree_equal(set, this, interval) &&
+ nft_rbtree_interval_end(this) &&
+ !nft_rbtree_interval_end(interval))
continue;
interval = rbe;
} else if (d > 0)
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 16477df45b3b..3d705c688a27 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -13,6 +13,8 @@
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/file.h>
+#include <linux/cred.h>
+
#include <net/sock.h>
#include <net/inet_sock.h>
#include <linux/netfilter/x_tables.h>
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index ed212ffc1d9d..4bbf4526b885 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -17,7 +17,7 @@
#include <linux/in.h>
#include <linux/slab.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/sockios.h>
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index b9edf5fae6ae..879885b31cce 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -21,6 +21,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/nfc.h>
+#include <linux/sched/signal.h>
#include "nfc.h"
#include "llcp.h"
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b1beb2b94ec7..c82301ce3fff 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -796,9 +796,8 @@ static void ovs_fragment(struct net *net, struct vport *vport,
unsigned long orig_dst;
struct rt6_info ovs_rt;
- if (!v6ops) {
+ if (!v6ops)
goto err;
- }
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 85cd59526670..e0a87776a010 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -485,7 +485,6 @@ static int handle_fragments(struct net *net, struct sw_flow_key *key,
} else if (key->eth.type == htons(ETH_P_IPV6)) {
enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
- skb_orphan(skb);
memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
err = nf_ct_frag6_gather(net, skb, user);
if (err) {
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 2bd0d1949312..a0dbe7ca8f72 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3103,7 +3103,7 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
int addr_len)
{
struct sock *sk = sock->sk;
- char name[15];
+ char name[sizeof(uaddr->sa_data) + 1];
/*
* Check legality
@@ -3111,7 +3111,11 @@ static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
if (addr_len != sizeof(struct sockaddr))
return -EINVAL;
- strlcpy(name, uaddr->sa_data, sizeof(name));
+ /* uaddr->sa_data comes from the userspace, it's not guaranteed to be
+ * zero-terminated.
+ */
+ memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data));
+ name[sizeof(uaddr->sa_data)] = 0;
return packet_do_bind(sk, name, 0, pkt_sk(sk)->num);
}
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 8bad5624a27a..222bedcd9575 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -23,6 +23,7 @@
*/
#include <linux/kernel.h>
+#include <linux/sched/signal.h>
#include <linux/slab.h>
#include <linux/socket.h>
#include <net/sock.h>
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index ffd5f2297584..a6c8da3ee893 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -27,6 +27,8 @@
#include <linux/kernel.h>
#include <linux/net.h>
#include <linux/poll.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include <net/tcp_states.h>
diff --git a/net/rds/ib.c b/net/rds/ib.c
index 91fe46f1e4cc..7a64c8db81ab 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -45,8 +45,8 @@
#include "ib.h"
#include "ib_mr.h"
-unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
-unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
+static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;
+static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;
unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT;
module_param(rds_ib_mr_1m_pool_size, int, 0444);
@@ -438,16 +438,12 @@ int rds_ib_init(void)
if (ret)
goto out_sysctl;
- ret = rds_trans_register(&rds_ib_transport);
- if (ret)
- goto out_recv;
+ rds_trans_register(&rds_ib_transport);
rds_info_register_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);
goto out;
-out_recv:
- rds_ib_recv_exit();
out_sysctl:
rds_ib_sysctl_exit();
out_ibreg:
diff --git a/net/rds/ib_mr.h b/net/rds/ib_mr.h
index 24c086db4511..5d6e98a79a5e 100644
--- a/net/rds/ib_mr.h
+++ b/net/rds/ib_mr.h
@@ -107,8 +107,6 @@ struct rds_ib_mr_pool {
};
extern struct workqueue_struct *rds_ib_mr_wq;
-extern unsigned int rds_ib_mr_1m_pool_size;
-extern unsigned int rds_ib_mr_8k_pool_size;
extern bool prefer_frmr;
struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_dev,
diff --git a/net/rds/page.c b/net/rds/page.c
index e2b5a5832d3d..7cc57e098ddb 100644
--- a/net/rds/page.c
+++ b/net/rds/page.c
@@ -45,35 +45,6 @@ struct rds_page_remainder {
static
DEFINE_PER_CPU_SHARED_ALIGNED(struct rds_page_remainder, rds_page_remainders);
-/*
- * returns 0 on success or -errno on failure.
- *
- * We don't have to worry about flush_dcache_page() as this only works
- * with private pages. If, say, we were to do directed receive to pinned
- * user pages we'd have to worry more about cache coherence. (Though
- * the flush_dcache_page() in get_user_pages() would probably be enough).
- */
-int rds_page_copy_user(struct page *page, unsigned long offset,
- void __user *ptr, unsigned long bytes,
- int to_user)
-{
- unsigned long ret;
- void *addr;
-
- addr = kmap(page);
- if (to_user) {
- rds_stats_add(s_copy_to_user, bytes);
- ret = copy_to_user(ptr, addr + offset, bytes);
- } else {
- rds_stats_add(s_copy_from_user, bytes);
- ret = copy_from_user(addr + offset, ptr, bytes);
- }
- kunmap(page);
-
- return ret ? -EFAULT : 0;
-}
-EXPORT_SYMBOL_GPL(rds_page_copy_user);
-
/**
* rds_page_remainder_alloc - build up regions of a message.
*
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 07fff73dd4f3..39518ef7af4d 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -798,13 +798,6 @@ static inline int rds_message_verify_checksum(const struct rds_header *hdr)
/* page.c */
int rds_page_remainder_alloc(struct scatterlist *scat, unsigned long bytes,
gfp_t gfp);
-int rds_page_copy_user(struct page *page, unsigned long offset,
- void __user *ptr, unsigned long bytes,
- int to_user);
-#define rds_page_copy_to_user(page, offset, ptr, bytes) \
- rds_page_copy_user(page, offset, ptr, bytes, 1)
-#define rds_page_copy_from_user(page, offset, ptr, bytes) \
- rds_page_copy_user(page, offset, ptr, bytes, 0)
void rds_page_exit(void);
/* recv.c */
@@ -910,7 +903,7 @@ void rds_connect_path_complete(struct rds_conn_path *conn, int curr);
void rds_connect_complete(struct rds_connection *conn);
/* transport.c */
-int rds_trans_register(struct rds_transport *trans);
+void rds_trans_register(struct rds_transport *trans);
void rds_trans_unregister(struct rds_transport *trans);
struct rds_transport *rds_trans_get_preferred(struct net *net, __be32 addr);
void rds_trans_put(struct rds_transport *trans);
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 5438f6725092..a973d3b4dff0 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -652,16 +652,12 @@ static int rds_tcp_init(void)
if (ret)
goto out_pernet;
- ret = rds_trans_register(&rds_tcp_transport);
- if (ret)
- goto out_recv;
+ rds_trans_register(&rds_tcp_transport);
rds_info_register_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);
goto out;
-out_recv:
- rds_tcp_recv_exit();
out_pernet:
unregister_pernet_subsys(&rds_tcp_net_ops);
out_notifier:
diff --git a/net/rds/transport.c b/net/rds/transport.c
index 2ffd3e30c643..0b188dd0a344 100644
--- a/net/rds/transport.c
+++ b/net/rds/transport.c
@@ -40,7 +40,7 @@
static struct rds_transport *transports[RDS_TRANS_COUNT];
static DECLARE_RWSEM(rds_trans_sem);
-int rds_trans_register(struct rds_transport *trans)
+void rds_trans_register(struct rds_transport *trans)
{
BUG_ON(strlen(trans->t_name) + 1 > TRANSNAMSIZ);
@@ -55,8 +55,6 @@ int rds_trans_register(struct rds_transport *trans)
}
up_write(&rds_trans_sem);
-
- return 0;
}
EXPORT_SYMBOL_GPL(rds_trans_register);
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9ad301c46b88..b8a1df2c9785 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -20,7 +20,7 @@
#include <linux/in.h>
#include <linux/slab.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/string.h>
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 199b46e93e64..7fb59c3f1542 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -290,10 +290,11 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.exclusive = false;
cp.service_id = srx->srx_service;
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+ /* The socket has been unlocked. */
if (!IS_ERR(call))
call->notify_rx = notify_rx;
- release_sock(&rx->sk);
+ mutex_unlock(&call->user_mutex);
_leave(" = %p", call);
return call;
}
@@ -310,7 +311,10 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call);
void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
{
_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
+
+ mutex_lock(&call->user_mutex);
rxrpc_release_call(rxrpc_sk(sock->sk), call);
+ mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_kernel);
}
EXPORT_SYMBOL(rxrpc_kernel_end_call);
@@ -450,14 +454,16 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
case RXRPC_SERVER_BOUND:
case RXRPC_SERVER_LISTENING:
ret = rxrpc_do_sendmsg(rx, m, len);
- break;
+ /* The socket has been unlocked */
+ goto out;
default:
ret = -EINVAL;
- break;
+ goto error_unlock;
}
error_unlock:
release_sock(&rx->sk);
+out:
_leave(" = %d", ret);
return ret;
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 12be432be9b2..26a7b1db1361 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -467,6 +467,7 @@ struct rxrpc_call {
struct rxrpc_connection *conn; /* connection carrying call */
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_sock __rcu *socket; /* socket responsible */
+ struct mutex user_mutex; /* User access mutex */
ktime_t ack_at; /* When deferred ACK needs to happen */
ktime_t resend_at; /* When next resend needs to happen */
ktime_t ping_at; /* When next to send a ping */
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 7c4c64ab8da2..0ed181f53f32 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -323,6 +323,8 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
*
* If we want to report an error, we mark the skb with the packet type and
* abort code and return NULL.
+ *
+ * The call is returned with the user access mutex held.
*/
struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
struct rxrpc_connection *conn,
@@ -371,6 +373,18 @@ found_service:
trace_rxrpc_receive(call, rxrpc_receive_incoming,
sp->hdr.serial, sp->hdr.seq);
+ /* Lock the call to prevent rxrpc_kernel_send/recv_data() and
+ * sendmsg()/recvmsg() inconveniently stealing the mutex once the
+ * notification is generated.
+ *
+ * The BUG should never happen because the kernel should be well
+ * behaved enough not to access the call before the first notification
+ * event and userspace is prevented from doing so until the state is
+ * appropriate.
+ */
+ if (!mutex_trylock(&call->user_mutex))
+ BUG();
+
/* Make the call live. */
rxrpc_incoming_call(rx, call, skb);
conn = call->conn;
@@ -429,10 +443,12 @@ out:
/*
* handle acceptance of a call by userspace
* - assign the user call ID to the call at the front of the queue
+ * - called with the socket locked.
*/
struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
unsigned long user_call_ID,
rxrpc_notify_rx_t notify_rx)
+ __releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_call *call;
struct rb_node *parent, **pp;
@@ -446,6 +462,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
if (list_empty(&rx->to_be_accepted)) {
write_unlock(&rx->call_lock);
+ release_sock(&rx->sk);
kleave(" = -ENODATA [empty]");
return ERR_PTR(-ENODATA);
}
@@ -470,10 +487,39 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
*/
call = list_entry(rx->to_be_accepted.next,
struct rxrpc_call, accept_link);
+ write_unlock(&rx->call_lock);
+
+ /* We need to gain the mutex from the interrupt handler without
+ * upsetting lockdep, so we have to release it there and take it here.
+ * We are, however, still holding the socket lock, so other accepts
+ * must wait for us and no one can add the user ID behind our backs.
+ */
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ release_sock(&rx->sk);
+ kleave(" = -ERESTARTSYS");
+ return ERR_PTR(-ERESTARTSYS);
+ }
+
+ write_lock(&rx->call_lock);
list_del_init(&call->accept_link);
sk_acceptq_removed(&rx->sk);
rxrpc_see_call(call);
+ /* Find the user ID insertion point. */
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ call = rb_entry(parent, struct rxrpc_call, sock_node);
+
+ if (user_call_ID < call->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ BUG();
+ }
+
write_lock_bh(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_SERVER_ACCEPTING:
@@ -499,6 +545,7 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
write_unlock(&rx->call_lock);
rxrpc_notify_socket(call);
rxrpc_service_prealloc(rx, GFP_KERNEL);
+ release_sock(&rx->sk);
_leave(" = %p{%d}", call, call->debug_id);
return call;
@@ -515,6 +562,7 @@ id_in_use:
write_unlock(&rx->call_lock);
out:
rxrpc_service_prealloc(rx, GFP_KERNEL);
+ release_sock(&rx->sk);
_leave(" = %d", ret);
return ERR_PTR(ret);
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 8b94db3c9b2e..d79cd36987a9 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -115,6 +115,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
if (!call->rxtx_annotations)
goto nomem_2;
+ mutex_init(&call->user_mutex);
setup_timer(&call->timer, rxrpc_call_timer_expired,
(unsigned long)call);
INIT_WORK(&call->processor, &rxrpc_process_call);
@@ -194,14 +195,16 @@ static void rxrpc_start_call_timer(struct rxrpc_call *call)
}
/*
- * set up a call for the given data
- * - called in process context with IRQs enabled
+ * Set up a call for the given parameters.
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
*/
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
struct rxrpc_conn_parameters *cp,
struct sockaddr_rxrpc *srx,
unsigned long user_call_ID,
gfp_t gfp)
+ __releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_call *call, *xcall;
struct rb_node *parent, **pp;
@@ -212,6 +215,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
call = rxrpc_alloc_client_call(srx, gfp);
if (IS_ERR(call)) {
+ release_sock(&rx->sk);
_leave(" = %ld", PTR_ERR(call));
return call;
}
@@ -219,6 +223,11 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
here, (const void *)user_call_ID);
+ /* We need to protect a partially set up call against the user as we
+ * will be acting outside the socket lock.
+ */
+ mutex_lock(&call->user_mutex);
+
/* Publish the call, even though it is incompletely set up as yet */
write_lock(&rx->call_lock);
@@ -250,6 +259,9 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock(&rxrpc_call_lock);
+ /* From this point on, the call is protected by its own lock. */
+ release_sock(&rx->sk);
+
/* Set up or get a connection record and set the protocol parameters,
* including channel number and call ID.
*/
@@ -279,6 +291,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
*/
error_dup_user_ID:
write_unlock(&rx->call_lock);
+ release_sock(&rx->sk);
ret = -EEXIST;
error:
@@ -287,6 +300,7 @@ error:
trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
here, ERR_PTR(ret));
rxrpc_release_call(rx, call);
+ mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
return ERR_PTR(ret);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 40a1ef2adeb4..c3be03e8d098 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -76,6 +76,8 @@
#include <linux/slab.h>
#include <linux/idr.h>
#include <linux/timer.h>
+#include <linux/sched/signal.h>
+
#include "ar-internal.h"
__read_mostly unsigned int rxrpc_max_client_connections = 1000;
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 78ec33477adf..9f4cfa25af7c 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1194,6 +1194,7 @@ void rxrpc_data_ready(struct sock *udp_sk)
goto reject_packet;
}
rxrpc_send_ping(call, skb, skew);
+ mutex_unlock(&call->user_mutex);
}
rxrpc_input_call_packet(call, skb, skew);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index f3a688e10843..6491ca46a03f 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -14,6 +14,8 @@
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/export.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
@@ -487,6 +489,20 @@ try_again:
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
+ /* We're going to drop the socket lock, so we need to lock the call
+ * against interference by sendmsg.
+ */
+ if (!mutex_trylock(&call->user_mutex)) {
+ ret = -EWOULDBLOCK;
+ if (flags & MSG_DONTWAIT)
+ goto error_requeue_call;
+ ret = -ERESTARTSYS;
+ if (mutex_lock_interruptible(&call->user_mutex) < 0)
+ goto error_requeue_call;
+ }
+
+ release_sock(&rx->sk);
+
if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
@@ -502,7 +518,7 @@ try_again:
&call->user_call_ID);
}
if (ret < 0)
- goto error;
+ goto error_unlock_call;
}
if (msg->msg_name) {
@@ -533,12 +549,12 @@ try_again:
}
if (ret < 0)
- goto error;
+ goto error_unlock_call;
if (call->state == RXRPC_CALL_COMPLETE) {
ret = rxrpc_recvmsg_term(call, msg);
if (ret < 0)
- goto error;
+ goto error_unlock_call;
if (!(flags & MSG_PEEK))
rxrpc_release_call(rx, call);
msg->msg_flags |= MSG_EOR;
@@ -551,8 +567,21 @@ try_again:
msg->msg_flags &= ~MSG_MORE;
ret = copied;
-error:
+error_unlock_call:
+ mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
+ return ret;
+
+error_requeue_call:
+ if (!(flags & MSG_PEEK)) {
+ write_lock_bh(&rx->recvmsg_lock);
+ list_add(&call->recvmsg_link, &rx->recvmsg_q);
+ write_unlock_bh(&rx->recvmsg_lock);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0, 0, 0, 0);
+ } else {
+ rxrpc_put_call(call, rxrpc_call_put);
+ }
error_no_call:
release_sock(&rx->sk);
trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
@@ -609,7 +638,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
iov.iov_len = size - *_offset;
iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
- lock_sock(sock->sk);
+ mutex_lock(&call->user_mutex);
switch (call->state) {
case RXRPC_CALL_CLIENT_RECV_REPLY:
@@ -648,7 +677,7 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
read_phase_complete:
ret = 1;
out:
- release_sock(sock->sk);
+ mutex_unlock(&call->user_mutex);
_leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
return ret;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index 0a6ef217aa8a..bc2d3dcff9de 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -15,6 +15,8 @@
#include <linux/gfp.h>
#include <linux/skbuff.h>
#include <linux/export.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
@@ -59,9 +61,12 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
}
trace_rxrpc_transmit(call, rxrpc_transmit_wait);
- release_sock(&rx->sk);
+ mutex_unlock(&call->user_mutex);
*timeo = schedule_timeout(*timeo);
- lock_sock(&rx->sk);
+ if (mutex_lock_interruptible(&call->user_mutex) < 0) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
}
remove_wait_queue(&call->waitq, &myself);
@@ -171,7 +176,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
/*
* send data through a socket
* - must be called in process context
- * - caller holds the socket locked
+ * - The caller holds the call user access mutex, but not the socket lock.
*/
static int rxrpc_send_data(struct rxrpc_sock *rx,
struct rxrpc_call *call,
@@ -437,10 +442,13 @@ static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
/*
* Create a new client call for sendmsg().
+ * - Called with the socket lock held, which it must release.
+ * - If it returns a call, the call's lock will need releasing by the caller.
*/
static struct rxrpc_call *
rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
unsigned long user_call_ID, bool exclusive)
+ __releases(&rx->sk.sk_lock.slock)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
@@ -450,8 +458,10 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
_enter("");
- if (!msg->msg_name)
+ if (!msg->msg_name) {
+ release_sock(&rx->sk);
return ERR_PTR(-EDESTADDRREQ);
+ }
key = rx->key;
if (key && !rx->key->payload.data[0])
@@ -464,6 +474,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
cp.exclusive = rx->exclusive | exclusive;
cp.service_id = srx->srx_service;
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+ /* The socket is now unlocked */
_leave(" = %p\n", call);
return call;
@@ -475,6 +486,7 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
* - the socket may be either a client socket or a server socket
*/
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+ __releases(&rx->sk.sk_lock.slock)
{
enum rxrpc_command cmd;
struct rxrpc_call *call;
@@ -488,12 +500,14 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
&exclusive);
if (ret < 0)
- return ret;
+ goto error_release_sock;
if (cmd == RXRPC_CMD_ACCEPT) {
+ ret = -EINVAL;
if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
- return -EINVAL;
+ goto error_release_sock;
call = rxrpc_accept_call(rx, user_call_ID, NULL);
+ /* The socket is now unlocked. */
if (IS_ERR(call))
return PTR_ERR(call);
rxrpc_put_call(call, rxrpc_call_put);
@@ -502,12 +516,30 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
if (!call) {
+ ret = -EBADSLT;
if (cmd != RXRPC_CMD_SEND_DATA)
- return -EBADSLT;
+ goto error_release_sock;
call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
exclusive);
+ /* The socket is now unlocked... */
if (IS_ERR(call))
return PTR_ERR(call);
+ /* ... and we have the call lock. */
+ } else {
+ ret = -EBUSY;
+ if (call->state == RXRPC_CALL_UNINITIALISED ||
+ call->state == RXRPC_CALL_CLIENT_AWAIT_CONN ||
+ call->state == RXRPC_CALL_SERVER_PREALLOC ||
+ call->state == RXRPC_CALL_SERVER_SECURING ||
+ call->state == RXRPC_CALL_SERVER_ACCEPTING)
+ goto error_release_sock;
+
+ ret = mutex_lock_interruptible(&call->user_mutex);
+ release_sock(&rx->sk);
+ if (ret < 0) {
+ ret = -ERESTARTSYS;
+ goto error_put;
+ }
}
_debug("CALL %d USR %lx ST %d on CONN %p",
@@ -535,9 +567,15 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
ret = rxrpc_send_data(rx, call, msg, len);
}
+ mutex_unlock(&call->user_mutex);
+error_put:
rxrpc_put_call(call, rxrpc_call_put);
_leave(" = %d", ret);
return ret;
+
+error_release_sock:
+ release_sock(&rx->sk);
+ return ret;
}
/**
@@ -562,7 +600,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
ASSERTCMP(msg->msg_name, ==, NULL);
ASSERTCMP(msg->msg_control, ==, NULL);
- lock_sock(sock->sk);
+ mutex_lock(&call->user_mutex);
_debug("CALL %d USR %lx ST %d on CONN %p",
call->debug_id, call->user_call_ID, call->state, call->conn);
@@ -577,7 +615,7 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
}
- release_sock(sock->sk);
+ mutex_unlock(&call->user_mutex);
_leave(" = %d", ret);
return ret;
}
@@ -598,12 +636,12 @@ void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
{
_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
- lock_sock(sock->sk);
+ mutex_lock(&call->user_mutex);
if (rxrpc_abort_call(why, call, 0, abort_code, error))
rxrpc_send_abort_packet(call);
- release_sock(sock->sk);
+ mutex_unlock(&call->user_mutex);
_leave("");
}
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 41c80b6c3906..ae7e4f5b348b 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -63,6 +63,7 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
#include <linux/string.h>
#include <linux/skbuff.h>
#include <linux/random.h>
diff --git a/net/sctp/input.c b/net/sctp/input.c
index fc458968fe4b..2a28ab20487f 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -884,14 +884,17 @@ int sctp_hash_transport(struct sctp_transport *t)
arg.paddr = &t->ipaddr;
arg.lport = htons(t->asoc->base.bind_addr.port);
+ rcu_read_lock();
list = rhltable_lookup(&sctp_transport_hashtable, &arg,
sctp_hash_params);
rhl_for_each_entry_rcu(transport, tmp, list, node)
if (transport->asoc->ep == t->asoc->ep) {
+ rcu_read_unlock();
err = -EEXIST;
goto out;
}
+ rcu_read_unlock();
err = rhltable_insert_key(&sctp_transport_hashtable, &arg,
&t->node, sctp_hash_params);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 465a9c8464f9..6f0a9be50f50 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -57,6 +57,7 @@
#include <linux/kernel.h>
#include <linux/wait.h>
#include <linux/time.h>
+#include <linux/sched/signal.h>
#include <linux/ip.h>
#include <linux/capability.h>
#include <linux/fcntl.h>
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 5d4208ad029e..85837ab90e89 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -27,6 +27,8 @@
#include <linux/inetdevice.h>
#include <linux/workqueue.h>
#include <linux/in.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include <net/tcp.h>
#include <net/smc.h>
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index cc6b6f8651eb..e41f594a1e1d 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -11,6 +11,8 @@
#include <linux/in.h>
#include <linux/if_ether.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include <net/tcp.h>
diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c
index 03dfcc6b7661..67a71d170bed 100644
--- a/net/smc/smc_close.c
+++ b/net/smc/smc_close.c
@@ -9,6 +9,8 @@
*/
#include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include "smc.h"
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 5d1878732f46..c4ef9a4ec569 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -11,6 +11,8 @@
#include <linux/net.h>
#include <linux/rcupdate.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include "smc.h"
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 6e73b28915ea..69a0013dd25c 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -15,6 +15,8 @@
#include <linux/net.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
+#include <linux/sched/signal.h>
+
#include <net/sock.h>
#include "smc.h"
diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c
index 41adf362936d..b5c279b22680 100644
--- a/net/strparser/strparser.c
+++ b/net/strparser/strparser.c
@@ -504,6 +504,7 @@ static int __init strp_mod_init(void)
static void __exit strp_mod_exit(void)
{
+ destroy_workqueue(strp_wq);
}
module_init(strp_mod_init);
module_exit(strp_mod_exit);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 2bff63a73cf8..d2623b9f23d6 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -8,6 +8,7 @@
#include <linux/types.h>
#include <linux/sched.h>
+#include <linux/cred.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/errno.h>
@@ -464,8 +465,10 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
* Note that the cred_unused list must be time-ordered.
*/
if (time_in_range(cred->cr_expire, expired, jiffies) &&
- test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
+ test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
+ freed = SHRINK_STOP;
break;
+ }
list_del_init(&cred->cr_lru);
number_cred_unused--;
@@ -520,7 +523,7 @@ static unsigned long
rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
{
- return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+ return number_cred_unused * sysctl_vfs_cache_pressure / 100;
}
static void
@@ -646,9 +649,6 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
cred->cr_auth = auth;
cred->cr_ops = ops;
cred->cr_expire = jiffies;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- cred->cr_magic = RPCAUTH_CRED_MAGIC;
-#endif
cred->cr_uid = acred->uid;
}
EXPORT_SYMBOL_GPL(rpcauth_init_cred);
@@ -876,8 +876,12 @@ int __init rpcauth_init_module(void)
err = rpc_init_generic_auth();
if (err < 0)
goto out2;
- register_shrinker(&rpc_cred_shrinker);
+ err = register_shrinker(&rpc_cred_shrinker);
+ if (err < 0)
+ goto out3;
return 0;
+out3:
+ rpc_destroy_generic_auth();
out2:
rpc_destroy_authunix();
out1:
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 153082598522..a54a7a3d28f5 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1489,8 +1489,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
case RPC_GSS_PROC_DESTROY:
if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
- rsci->h.expiry_time = seconds_since_boot();
- set_bit(CACHE_NEGATIVE, &rsci->h.flags);
+ /* Delete the entry from the cache_list and call cache_put */
+ sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
if (resv->iov_len + 4 > PAGE_SIZE)
goto drop;
svc_putnl(resv, RPC_SUCCESS);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 4d17376b2acb..5f3d527dff65 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -139,7 +139,4 @@ struct rpc_cred null_cred = {
.cr_ops = &null_credops,
.cr_count = ATOMIC_INIT(1),
.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
- .cr_magic = RPCAUTH_CRED_MAGIC,
-#endif
};
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 306fc0f54596..82337e1ec9cd 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -14,12 +14,10 @@
#include <linux/sunrpc/auth.h>
#include <linux/user_namespace.h>
-#define NFS_NGROUPS 16
-
struct unx_cred {
struct rpc_cred uc_base;
kgid_t uc_gid;
- kgid_t uc_gids[NFS_NGROUPS];
+ kgid_t uc_gids[UNX_NGROUPS];
};
#define uc_uid uc_base.cr_uid
@@ -82,13 +80,13 @@ unx_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags, gfp_t
if (acred->group_info != NULL)
groups = acred->group_info->ngroups;
- if (groups > NFS_NGROUPS)
- groups = NFS_NGROUPS;
+ if (groups > UNX_NGROUPS)
+ groups = UNX_NGROUPS;
cred->uc_gid = acred->gid;
for (i = 0; i < groups; i++)
cred->uc_gids[i] = acred->group_info->gid[i];
- if (i < NFS_NGROUPS)
+ if (i < UNX_NGROUPS)
cred->uc_gids[i] = INVALID_GID;
return &cred->uc_base;
@@ -132,12 +130,12 @@ unx_match(struct auth_cred *acred, struct rpc_cred *rcred, int flags)
if (acred->group_info != NULL)
groups = acred->group_info->ngroups;
- if (groups > NFS_NGROUPS)
- groups = NFS_NGROUPS;
+ if (groups > UNX_NGROUPS)
+ groups = UNX_NGROUPS;
for (i = 0; i < groups ; i++)
if (!gid_eq(cred->uc_gids[i], acred->group_info->gid[i]))
return 0;
- if (groups < NFS_NGROUPS && gid_valid(cred->uc_gids[groups]))
+ if (groups < UNX_NGROUPS && gid_valid(cred->uc_gids[groups]))
return 0;
return 1;
}
@@ -166,7 +164,7 @@ unx_marshal(struct rpc_task *task, __be32 *p)
*p++ = htonl((u32) from_kuid(&init_user_ns, cred->uc_uid));
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gid));
hold = p++;
- for (i = 0; i < 16 && gid_valid(cred->uc_gids[i]); i++)
+ for (i = 0; i < UNX_NGROUPS && gid_valid(cred->uc_gids[i]); i++)
*p++ = htonl((u32) from_kgid(&init_user_ns, cred->uc_gids[i]));
*hold = htonl(p - hold - 1); /* gid array length */
*base = htonl((p - base - 1) << 2); /* cred length */
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index f39e3e11f9aa..79d55d949d9a 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -362,11 +362,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
cache_purge(cd);
spin_lock(&cache_list_lock);
write_lock(&cd->hash_lock);
- if (cd->entries) {
- write_unlock(&cd->hash_lock);
- spin_unlock(&cache_list_lock);
- goto out;
- }
if (current_detail == cd)
current_detail = NULL;
list_del_init(&cd->others);
@@ -376,9 +371,6 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
/* module must be being unloaded so its safe to kill the worker */
cancel_delayed_work_sync(&cache_cleaner);
}
- return;
-out:
- printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
@@ -497,13 +489,32 @@ EXPORT_SYMBOL_GPL(cache_flush);
void cache_purge(struct cache_detail *detail)
{
- time_t now = seconds_since_boot();
- if (detail->flush_time >= now)
- now = detail->flush_time + 1;
- /* 'now' is the maximum value any 'last_refresh' can have */
- detail->flush_time = now;
- detail->nextcheck = seconds_since_boot();
- cache_flush();
+ struct cache_head *ch = NULL;
+ struct hlist_head *head = NULL;
+ struct hlist_node *tmp = NULL;
+ int i = 0;
+
+ write_lock(&detail->hash_lock);
+ if (!detail->entries) {
+ write_unlock(&detail->hash_lock);
+ return;
+ }
+
+ dprintk("RPC: %d entries in %s cache\n", detail->entries, detail->name);
+ for (i = 0; i < detail->hash_size; i++) {
+ head = &detail->hash_table[i];
+ hlist_for_each_entry_safe(ch, tmp, head, cache_list) {
+ hlist_del_init(&ch->cache_list);
+ detail->entries--;
+
+ set_bit(CACHE_CLEANED, &ch->flags);
+ write_unlock(&detail->hash_lock);
+ cache_fresh_unlocked(ch, detail);
+ cache_put(ch, detail);
+ write_lock(&detail->hash_lock);
+ }
+ }
+ write_unlock(&detail->hash_lock);
}
EXPORT_SYMBOL_GPL(cache_purge);
@@ -717,7 +728,7 @@ void cache_clean_deferred(void *owner)
/*
* communicate with user-space
*
- * We have a magic /proc file - /proc/sunrpc/<cachename>/channel.
+ * We have a magic /proc file - /proc/net/rpc/<cachename>/channel.
* On read, you get a full request, or block.
* On write, an update request is processed.
* Poll works if anything to read, and always allows write.
@@ -1272,7 +1283,7 @@ EXPORT_SYMBOL_GPL(qword_get);
/*
- * support /proc/sunrpc/cache/$CACHENAME/content
+ * support /proc/net/rpc/$CACHENAME/content
* as a seqfile.
* We call ->cache_show passing NULL for the item to
* get a header, then pass each real item in the cache
@@ -1427,20 +1438,11 @@ static ssize_t read_flush(struct file *file, char __user *buf,
struct cache_detail *cd)
{
char tbuf[22];
- unsigned long p = *ppos;
size_t len;
- snprintf(tbuf, sizeof(tbuf), "%lu\n", convert_to_wallclock(cd->flush_time));
- len = strlen(tbuf);
- if (p >= len)
- return 0;
- len -= p;
- if (len > count)
- len = count;
- if (copy_to_user(buf, (void*)(tbuf+p), len))
- return -EFAULT;
- *ppos += len;
- return len;
+ len = snprintf(tbuf, sizeof(tbuf), "%lu\n",
+ convert_to_wallclock(cd->flush_time));
+ return simple_read_from_buffer(buf, count, ppos, tbuf, len);
}
static ssize_t write_flush(struct file *file, const char __user *buf,
@@ -1600,21 +1602,12 @@ static const struct file_operations cache_flush_operations_procfs = {
.llseek = no_llseek,
};
-static void remove_cache_proc_entries(struct cache_detail *cd, struct net *net)
+static void remove_cache_proc_entries(struct cache_detail *cd)
{
- struct sunrpc_net *sn;
-
- if (cd->u.procfs.proc_ent == NULL)
- return;
- if (cd->u.procfs.flush_ent)
- remove_proc_entry("flush", cd->u.procfs.proc_ent);
- if (cd->u.procfs.channel_ent)
- remove_proc_entry("channel", cd->u.procfs.proc_ent);
- if (cd->u.procfs.content_ent)
- remove_proc_entry("content", cd->u.procfs.proc_ent);
- cd->u.procfs.proc_ent = NULL;
- sn = net_generic(net, sunrpc_net_id);
- remove_proc_entry(cd->name, sn->proc_net_rpc);
+ if (cd->procfs) {
+ proc_remove(cd->procfs);
+ cd->procfs = NULL;
+ }
}
#ifdef CONFIG_PROC_FS
@@ -1624,38 +1617,30 @@ static int create_cache_proc_entries(struct cache_detail *cd, struct net *net)
struct sunrpc_net *sn;
sn = net_generic(net, sunrpc_net_id);
- cd->u.procfs.proc_ent = proc_mkdir(cd->name, sn->proc_net_rpc);
- if (cd->u.procfs.proc_ent == NULL)
+ cd->procfs = proc_mkdir(cd->name, sn->proc_net_rpc);
+ if (cd->procfs == NULL)
goto out_nomem;
- cd->u.procfs.channel_ent = NULL;
- cd->u.procfs.content_ent = NULL;
p = proc_create_data("flush", S_IFREG|S_IRUSR|S_IWUSR,
- cd->u.procfs.proc_ent,
- &cache_flush_operations_procfs, cd);
- cd->u.procfs.flush_ent = p;
+ cd->procfs, &cache_flush_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
if (cd->cache_request || cd->cache_parse) {
p = proc_create_data("channel", S_IFREG|S_IRUSR|S_IWUSR,
- cd->u.procfs.proc_ent,
- &cache_file_operations_procfs, cd);
- cd->u.procfs.channel_ent = p;
+ cd->procfs, &cache_file_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
}
if (cd->cache_show) {
p = proc_create_data("content", S_IFREG|S_IRUSR,
- cd->u.procfs.proc_ent,
- &content_file_operations_procfs, cd);
- cd->u.procfs.content_ent = p;
+ cd->procfs, &content_file_operations_procfs, cd);
if (p == NULL)
goto out_nomem;
}
return 0;
out_nomem:
- remove_cache_proc_entries(cd, net);
+ remove_cache_proc_entries(cd);
return -ENOMEM;
}
#else /* CONFIG_PROC_FS */
@@ -1684,7 +1669,7 @@ EXPORT_SYMBOL_GPL(cache_register_net);
void cache_unregister_net(struct cache_detail *cd, struct net *net)
{
- remove_cache_proc_entries(cd, net);
+ remove_cache_proc_entries(cd);
sunrpc_destroy_cache_detail(cd);
}
EXPORT_SYMBOL_GPL(cache_unregister_net);
@@ -1843,15 +1828,29 @@ int sunrpc_cache_register_pipefs(struct dentry *parent,
struct dentry *dir = rpc_create_cache_dir(parent, name, umode, cd);
if (IS_ERR(dir))
return PTR_ERR(dir);
- cd->u.pipefs.dir = dir;
+ cd->pipefs = dir;
return 0;
}
EXPORT_SYMBOL_GPL(sunrpc_cache_register_pipefs);
void sunrpc_cache_unregister_pipefs(struct cache_detail *cd)
{
- rpc_remove_cache_dir(cd->u.pipefs.dir);
- cd->u.pipefs.dir = NULL;
+ if (cd->pipefs) {
+ rpc_remove_cache_dir(cd->pipefs);
+ cd->pipefs = NULL;
+ }
}
EXPORT_SYMBOL_GPL(sunrpc_cache_unregister_pipefs);
+void sunrpc_cache_unhash(struct cache_detail *cd, struct cache_head *h)
+{
+ write_lock(&cd->hash_lock);
+ if (!hlist_unhashed(&h->cache_list)){
+ hlist_del_init(&h->cache_list);
+ cd->entries--;
+ write_unlock(&cd->hash_lock);
+ cache_put(h, cd);
+ } else
+ write_unlock(&cd->hash_lock);
+}
+EXPORT_SYMBOL_GPL(sunrpc_cache_unhash);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 1dc9f3bac099..52da3ce54bb5 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1453,21 +1453,6 @@ size_t rpc_max_bc_payload(struct rpc_clnt *clnt)
EXPORT_SYMBOL_GPL(rpc_max_bc_payload);
/**
- * rpc_get_timeout - Get timeout for transport in units of HZ
- * @clnt: RPC client to query
- */
-unsigned long rpc_get_timeout(struct rpc_clnt *clnt)
-{
- unsigned long ret;
-
- rcu_read_lock();
- ret = rcu_dereference(clnt->cl_xprt)->timeout->to_initval;
- rcu_read_unlock();
- return ret;
-}
-EXPORT_SYMBOL_GPL(rpc_get_timeout);
-
-/**
* rpc_force_rebind - force transport to check that remote port is unchanged
* @clnt: client to rebind
*
@@ -2699,6 +2684,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
{
struct rpc_xprt_switch *xps;
struct rpc_xprt *xprt;
+ unsigned long connect_timeout;
unsigned long reconnect_timeout;
unsigned char resvport;
int ret = 0;
@@ -2711,6 +2697,7 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
return -EAGAIN;
}
resvport = xprt->resvport;
+ connect_timeout = xprt->connect_timeout;
reconnect_timeout = xprt->max_reconnect_timeout;
rcu_read_unlock();
@@ -2720,7 +2707,10 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
goto out_put_switch;
}
xprt->resvport = resvport;
- xprt->max_reconnect_timeout = reconnect_timeout;
+ if (xprt->ops->set_connect_timeout != NULL)
+ xprt->ops->set_connect_timeout(xprt,
+ connect_timeout,
+ reconnect_timeout);
rpc_xprt_switch_set_roundrobin(xps);
if (setup) {
@@ -2737,26 +2727,39 @@ out_put_switch:
}
EXPORT_SYMBOL_GPL(rpc_clnt_add_xprt);
+struct connect_timeout_data {
+ unsigned long connect_timeout;
+ unsigned long reconnect_timeout;
+};
+
static int
-rpc_xprt_cap_max_reconnect_timeout(struct rpc_clnt *clnt,
+rpc_xprt_set_connect_timeout(struct rpc_clnt *clnt,
struct rpc_xprt *xprt,
void *data)
{
- unsigned long timeout = *((unsigned long *)data);
+ struct connect_timeout_data *timeo = data;
- if (timeout < xprt->max_reconnect_timeout)
- xprt->max_reconnect_timeout = timeout;
+ if (xprt->ops->set_connect_timeout)
+ xprt->ops->set_connect_timeout(xprt,
+ timeo->connect_timeout,
+ timeo->reconnect_timeout);
return 0;
}
void
-rpc_cap_max_reconnect_timeout(struct rpc_clnt *clnt, unsigned long timeo)
+rpc_set_connect_timeout(struct rpc_clnt *clnt,
+ unsigned long connect_timeout,
+ unsigned long reconnect_timeout)
{
+ struct connect_timeout_data timeout = {
+ .connect_timeout = connect_timeout,
+ .reconnect_timeout = reconnect_timeout,
+ };
rpc_clnt_iterate_for_each_xprt(clnt,
- rpc_xprt_cap_max_reconnect_timeout,
- &timeo);
+ rpc_xprt_set_connect_timeout,
+ &timeout);
}
-EXPORT_SYMBOL_GPL(rpc_cap_max_reconnect_timeout);
+EXPORT_SYMBOL_GPL(rpc_set_connect_timeout);
void rpc_clnt_xprt_switch_put(struct rpc_clnt *clnt)
{
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index e7b4d93566df..c8fd0b6c1618 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -16,11 +16,6 @@ static struct dentry *rpc_xprt_dir;
unsigned int rpc_inject_disconnect;
-struct rpc_clnt_iter {
- struct rpc_clnt *clnt;
- loff_t pos;
-};
-
static int
tasks_show(struct seq_file *f, void *v)
{
@@ -47,12 +42,10 @@ static void *
tasks_start(struct seq_file *f, loff_t *ppos)
__acquires(&clnt->cl_lock)
{
- struct rpc_clnt_iter *iter = f->private;
+ struct rpc_clnt *clnt = f->private;
loff_t pos = *ppos;
- struct rpc_clnt *clnt = iter->clnt;
struct rpc_task *task;
- iter->pos = pos + 1;
spin_lock(&clnt->cl_lock);
list_for_each_entry(task, &clnt->cl_tasks, tk_task)
if (pos-- == 0)
@@ -63,12 +56,10 @@ tasks_start(struct seq_file *f, loff_t *ppos)
static void *
tasks_next(struct seq_file *f, void *v, loff_t *pos)
{
- struct rpc_clnt_iter *iter = f->private;
- struct rpc_clnt *clnt = iter->clnt;
+ struct rpc_clnt *clnt = f->private;
struct rpc_task *task = v;
struct list_head *next = task->tk_task.next;
- ++iter->pos;
++*pos;
/* If there's another task on list, return it */
@@ -81,9 +72,7 @@ static void
tasks_stop(struct seq_file *f, void *v)
__releases(&clnt->cl_lock)
{
- struct rpc_clnt_iter *iter = f->private;
- struct rpc_clnt *clnt = iter->clnt;
-
+ struct rpc_clnt *clnt = f->private;
spin_unlock(&clnt->cl_lock);
}
@@ -96,17 +85,13 @@ static const struct seq_operations tasks_seq_operations = {
static int tasks_open(struct inode *inode, struct file *filp)
{
- int ret = seq_open_private(filp, &tasks_seq_operations,
- sizeof(struct rpc_clnt_iter));
-
+ int ret = seq_open(filp, &tasks_seq_operations);
if (!ret) {
struct seq_file *seq = filp->private_data;
- struct rpc_clnt_iter *iter = seq->private;
-
- iter->clnt = inode->i_private;
+ struct rpc_clnt *clnt = seq->private = inode->i_private;
- if (!atomic_inc_not_zero(&iter->clnt->cl_count)) {
- seq_release_private(inode, filp);
+ if (!atomic_inc_not_zero(&clnt->cl_count)) {
+ seq_release(inode, filp);
ret = -EINVAL;
}
}
@@ -118,10 +103,10 @@ static int
tasks_release(struct inode *inode, struct file *filp)
{
struct seq_file *seq = filp->private_data;
- struct rpc_clnt_iter *iter = seq->private;
+ struct rpc_clnt *clnt = seq->private;
- rpc_release_client(iter->clnt);
- return seq_release_private(inode, filp);
+ rpc_release_client(clnt);
+ return seq_release(inode, filp);
}
static const struct file_operations tasks_fops = {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2e22889a8837..a08aeb56b8e4 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -11,7 +11,7 @@
*/
#include <linux/linkage.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/net.h>
#include <linux/in.h>
@@ -385,7 +385,7 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
for (i = 0; i < progp->pg_nvers; i++) {
if (progp->pg_vers[i] == NULL)
continue;
- if (progp->pg_vers[i]->vs_hidden == 0)
+ if (!progp->pg_vers[i]->vs_hidden)
return 1;
}
}
@@ -976,6 +976,13 @@ int svc_register(const struct svc_serv *serv, struct net *net,
if (vers->vs_hidden)
continue;
+ /*
+ * Don't register a UDP port if we need congestion
+ * control.
+ */
+ if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP)
+ continue;
+
error = __svc_register(net, progp->pg_name, progp->pg_prog,
i, family, proto, port);
@@ -1169,6 +1176,21 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
!(versp = progp->pg_vers[vers]))
goto err_bad_vers;
+ /*
+ * Some protocol versions (namely NFSv4) require some form of
+ * congestion control. (See RFC 7530 section 3.1 paragraph 2)
+ * In other words, UDP is not allowed. We mark those when setting
+ * up the svc_xprt, and verify that here.
+ *
+ * The spec is not very clear about what error should be returned
+ * when someone tries to access a server that is listening on UDP
+ * for lower versions. RPC_PROG_MISMATCH seems to be the closest
+ * fit.
+ */
+ if (versp->vs_need_cong_ctrl &&
+ !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
+ goto err_bad_vers;
+
procp = versp->vs_proc + proc;
if (proc >= versp->vs_nproc || !procp->pc_func)
goto err_bad_proc;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 64af4f034de6..f81eaa8e0888 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -403,7 +403,7 @@ svcauth_unix_info_release(struct svc_xprt *xpt)
/****************************************************************************
* auth.unix.gid cache
* simple cache to map a UID to a list of GIDs
- * because AUTH_UNIX aka AUTH_SYS has a max of 16
+ * because AUTH_UNIX aka AUTH_SYS has a max of UNX_NGROUPS
*/
#define GID_HASHBITS 8
#define GID_HASHMAX (1<<GID_HASHBITS)
@@ -810,7 +810,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
cred->cr_uid = make_kuid(&init_user_ns, svc_getnl(argv)); /* uid */
cred->cr_gid = make_kgid(&init_user_ns, svc_getnl(argv)); /* gid */
slen = svc_getnl(argv); /* gids length */
- if (slen > 16 || (len -= (slen + 2)*4) < 0)
+ if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
goto badcred;
cred->cr_group_info = groups_alloc(slen);
if (cred->cr_group_info == NULL)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index d227d97f7ad4..8931e33b6541 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1306,6 +1306,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
&svsk->sk_xprt, serv);
set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
+ set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 7f1071e103ca..1f7082144e01 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1518,3 +1518,37 @@ out:
}
EXPORT_SYMBOL_GPL(xdr_process_buf);
+/**
+ * xdr_stream_decode_string_dup - Decode and duplicate variable length string
+ * @xdr: pointer to xdr_stream
+ * @str: location to store pointer to string
+ * @maxlen: maximum acceptable string length
+ * @gfp_flags: GFP mask to use
+ *
+ * Return values:
+ * On success, returns length of NUL-terminated string stored in *@ptr
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE if the size of the string would exceed @maxlen
+ * %-ENOMEM on memory allocation failure
+ */
+ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
+ size_t maxlen, gfp_t gfp_flags)
+{
+ void *p;
+ ssize_t ret;
+
+ ret = xdr_stream_decode_opaque_inline(xdr, &p, maxlen);
+ if (ret > 0) {
+ char *s = kmalloc(ret + 1, gfp_flags);
+ if (s != NULL) {
+ memcpy(s, p, ret);
+ s[ret] = '\0';
+ *str = s;
+ return strlen(s);
+ }
+ ret = -ENOMEM;
+ }
+ *str = NULL;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 9a6be030ca7d..b530a2852ba8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -897,13 +897,11 @@ static void xprt_timer(struct rpc_task *task)
return;
dprintk("RPC: %5u xprt_timer\n", task->tk_pid);
- spin_lock_bh(&xprt->transport_lock);
if (!req->rq_reply_bytes_recvd) {
if (xprt->ops->timer)
xprt->ops->timer(xprt, task);
} else
task->tk_status = 0;
- spin_unlock_bh(&xprt->transport_lock);
}
/**
diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c
index 1ebb09e1ac4f..59e64025ed96 100644
--- a/net/sunrpc/xprtrdma/fmr_ops.c
+++ b/net/sunrpc/xprtrdma/fmr_ops.c
@@ -310,10 +310,7 @@ fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
struct rpcrdma_mw *mw;
while (!list_empty(&req->rl_registered)) {
- mw = list_first_entry(&req->rl_registered,
- struct rpcrdma_mw, mw_list);
- list_del_init(&mw->mw_list);
-
+ mw = rpcrdma_pop_mw(&req->rl_registered);
if (sync)
fmr_op_recover_mr(mw);
else
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 47bed5333c7f..f81dd93176c0 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -466,8 +466,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_rep *rep = req->rl_reply;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_mw *mw, *tmp;
struct rpcrdma_frmr *f;
+ struct rpcrdma_mw *mw;
int count, rc;
dprintk("RPC: %s: req %p\n", __func__, req);
@@ -534,10 +534,10 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
* them to the free MW list.
*/
unmap:
- list_for_each_entry_safe(mw, tmp, &req->rl_registered, mw_list) {
+ while (!list_empty(&req->rl_registered)) {
+ mw = rpcrdma_pop_mw(&req->rl_registered);
dprintk("RPC: %s: DMA unmapping frmr %p\n",
__func__, &mw->frmr);
- list_del_init(&mw->mw_list);
ib_dma_unmap_sg(ia->ri_device,
mw->mw_sg, mw->mw_nents, mw->mw_dir);
rpcrdma_put_mw(r_xprt, mw);
@@ -571,10 +571,7 @@ frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
struct rpcrdma_mw *mw;
while (!list_empty(&req->rl_registered)) {
- mw = list_first_entry(&req->rl_registered,
- struct rpcrdma_mw, mw_list);
- list_del_init(&mw->mw_list);
-
+ mw = rpcrdma_pop_mw(&req->rl_registered);
if (sync)
frwr_op_recover_mr(mw);
else
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index c52e0f2ffe52..a044be2d6ad7 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -125,14 +125,34 @@ void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt)
/* The client can send a request inline as long as the RPCRDMA header
* plus the RPC call fit under the transport's inline limit. If the
* combined call message size exceeds that limit, the client must use
- * the read chunk list for this operation.
+ * a Read chunk for this operation.
+ *
+ * A Read chunk is also required if sending the RPC call inline would
+ * exceed this device's max_sge limit.
*/
static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt,
struct rpc_rqst *rqst)
{
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct xdr_buf *xdr = &rqst->rq_snd_buf;
+ unsigned int count, remaining, offset;
+
+ if (xdr->len > r_xprt->rx_ia.ri_max_inline_write)
+ return false;
+
+ if (xdr->page_len) {
+ remaining = xdr->page_len;
+ offset = xdr->page_base & ~PAGE_MASK;
+ count = 0;
+ while (remaining) {
+ remaining -= min_t(unsigned int,
+ PAGE_SIZE - offset, remaining);
+ offset = 0;
+ if (++count > r_xprt->rx_ia.ri_max_send_sges)
+ return false;
+ }
+ }
- return rqst->rq_snd_buf.len <= ia->ri_max_inline_write;
+ return true;
}
/* The client can't know how large the actual reply will be. Thus it
@@ -186,9 +206,9 @@ rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n)
*/
static int
-rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
- enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg,
- bool reminv_expected)
+rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
+ unsigned int pos, enum rpcrdma_chunktype type,
+ struct rpcrdma_mr_seg *seg)
{
int len, n, p, page_base;
struct page **ppages;
@@ -226,22 +246,21 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
if (len && n == RPCRDMA_MAX_SEGS)
goto out_overflow;
- /* When encoding the read list, the tail is always sent inline */
- if (type == rpcrdma_readch)
+ /* When encoding a Read chunk, the tail iovec contains an
+ * XDR pad and may be omitted.
+ */
+ if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup)
return n;
- /* When encoding the Write list, some servers need to see an extra
- * segment for odd-length Write chunks. The upper layer provides
- * space in the tail iovec for this purpose.
+ /* When encoding a Write chunk, some servers need to see an
+ * extra segment for non-XDR-aligned Write chunks. The upper
+ * layer provides space in the tail iovec that may be used
+ * for this purpose.
*/
- if (type == rpcrdma_writech && reminv_expected)
+ if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup)
return n;
if (xdrbuf->tail[0].iov_len) {
- /* the rpcrdma protocol allows us to omit any trailing
- * xdr pad bytes, saving the server an RDMA operation. */
- if (xdrbuf->tail[0].iov_len < 4 && xprt_rdma_pad_optimize)
- return n;
n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n);
if (n == RPCRDMA_MAX_SEGS)
goto out_overflow;
@@ -293,7 +312,8 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
if (rtype == rpcrdma_areadch)
pos = 0;
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false);
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos,
+ rtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -302,7 +322,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt,
false, &mw);
if (n < 0)
return ERR_PTR(n);
- list_add(&mw->mw_list, &req->rl_registered);
+ rpcrdma_push_mw(mw, &req->rl_registered);
*iptr++ = xdr_one; /* item present */
@@ -355,10 +375,9 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf,
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf,
rqst->rq_rcv_buf.head[0].iov_len,
- wtype, seg,
- r_xprt->rx_ia.ri_reminv_expected);
+ wtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -371,7 +390,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
true, &mw);
if (n < 0)
return ERR_PTR(n);
- list_add(&mw->mw_list, &req->rl_registered);
+ rpcrdma_push_mw(mw, &req->rl_registered);
iptr = xdr_encode_rdma_segment(iptr, mw);
@@ -423,8 +442,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
}
seg = req->rl_segments;
- nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg,
- r_xprt->rx_ia.ri_reminv_expected);
+ nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg);
if (nsegs < 0)
return ERR_PTR(nsegs);
@@ -437,7 +455,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt,
true, &mw);
if (n < 0)
return ERR_PTR(n);
- list_add(&mw->mw_list, &req->rl_registered);
+ rpcrdma_push_mw(mw, &req->rl_registered);
iptr = xdr_encode_rdma_segment(iptr, mw);
@@ -741,13 +759,13 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
iptr = headerp->rm_body.rm_chunks;
iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype);
if (IS_ERR(iptr))
- goto out_unmap;
+ goto out_err;
iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype);
if (IS_ERR(iptr))
- goto out_unmap;
+ goto out_err;
iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype);
if (IS_ERR(iptr))
- goto out_unmap;
+ goto out_err;
hdrlen = (unsigned char *)iptr - (unsigned char *)headerp;
dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n",
@@ -758,12 +776,14 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen,
&rqst->rq_snd_buf, rtype)) {
iptr = ERR_PTR(-EIO);
- goto out_unmap;
+ goto out_err;
}
return 0;
-out_unmap:
- r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
+out_err:
+ pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
+ PTR_ERR(iptr));
+ r_xprt->rx_stats.failed_marshal_count++;
return PTR_ERR(iptr);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index cb1e48e54eb1..ff1df40f0d26 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -201,19 +201,20 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst)
{
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- struct rpcrdma_msg *headerp = (struct rpcrdma_msg *)rqst->rq_buffer;
+ __be32 *p;
int rc;
/* Space in the send buffer for an RPC/RDMA header is reserved
* via xprt->tsh_size.
*/
- headerp->rm_xid = rqst->rq_xid;
- headerp->rm_vers = rpcrdma_version;
- headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
- headerp->rm_type = rdma_msg;
- headerp->rm_body.rm_chunks[0] = xdr_zero;
- headerp->rm_body.rm_chunks[1] = xdr_zero;
- headerp->rm_body.rm_chunks[2] = xdr_zero;
+ p = rqst->rq_buffer;
+ *p++ = rqst->rq_xid;
+ *p++ = rpcrdma_version;
+ *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_max_requests);
+ *p++ = rdma_msg;
+ *p++ = xdr_zero;
+ *p++ = xdr_zero;
+ *p = xdr_zero;
#ifdef SVCRDMA_BACKCHANNEL_DEBUG
pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_marshal.c b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
index 0ba9887f3e22..1c4aabf0f657 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_marshal.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_marshal.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2016 Oracle. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -47,102 +48,43 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-/*
- * Decodes a read chunk list. The expected format is as follows:
- * descrim : xdr_one
- * position : __be32 offset into XDR stream
- * handle : __be32 RKEY
- * . . .
- * end-of-list: xdr_zero
- */
-static __be32 *decode_read_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_read_list(__be32 *p, __be32 *end)
{
- struct rpcrdma_read_chunk *ch = (struct rpcrdma_read_chunk *)va;
+ __be32 *next;
- while (ch->rc_discrim != xdr_zero) {
- if (((unsigned long)ch + sizeof(struct rpcrdma_read_chunk)) >
- (unsigned long)vaend) {
- dprintk("svcrdma: vaend=%p, ch=%p\n", vaend, ch);
+ while (*p++ != xdr_zero) {
+ next = p + rpcrdma_readchunk_maxsz - 1;
+ if (next > end)
return NULL;
- }
- ch++;
+ p = next;
}
- return &ch->rc_position;
+ return p;
}
-/*
- * Decodes a write chunk list. The expected format is as follows:
- * descrim : xdr_one
- * nchunks : <count>
- * handle : __be32 RKEY ---+
- * length : __be32 <len of segment> |
- * offset : remove va + <count>
- * . . . |
- * ---+
- */
-static __be32 *decode_write_list(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_write_list(__be32 *p, __be32 *end)
{
- unsigned long start, end;
- int nchunks;
-
- struct rpcrdma_write_array *ary =
- (struct rpcrdma_write_array *)va;
+ __be32 *next;
- /* Check for not write-array */
- if (ary->wc_discrim == xdr_zero)
- return &ary->wc_nchunks;
-
- if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
- (unsigned long)vaend) {
- dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
- return NULL;
- }
- nchunks = be32_to_cpu(ary->wc_nchunks);
-
- start = (unsigned long)&ary->wc_array[0];
- end = (unsigned long)vaend;
- if (nchunks < 0 ||
- nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
- (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
- dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
- ary, nchunks, vaend);
- return NULL;
+ while (*p++ != xdr_zero) {
+ next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+ if (next > end)
+ return NULL;
+ p = next;
}
- /*
- * rs_length is the 2nd 4B field in wc_target and taking its
- * address skips the list terminator
- */
- return &ary->wc_array[nchunks].wc_target.rs_length;
+ return p;
}
-static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
+static __be32 *xdr_check_reply_chunk(__be32 *p, __be32 *end)
{
- unsigned long start, end;
- int nchunks;
- struct rpcrdma_write_array *ary =
- (struct rpcrdma_write_array *)va;
-
- /* Check for no reply-array */
- if (ary->wc_discrim == xdr_zero)
- return &ary->wc_nchunks;
-
- if ((unsigned long)ary + sizeof(struct rpcrdma_write_array) >
- (unsigned long)vaend) {
- dprintk("svcrdma: ary=%p, vaend=%p\n", ary, vaend);
- return NULL;
- }
- nchunks = be32_to_cpu(ary->wc_nchunks);
-
- start = (unsigned long)&ary->wc_array[0];
- end = (unsigned long)vaend;
- if (nchunks < 0 ||
- nchunks > (SIZE_MAX - start) / sizeof(struct rpcrdma_write_chunk) ||
- (start + (sizeof(struct rpcrdma_write_chunk) * nchunks)) > end) {
- dprintk("svcrdma: ary=%p, wc_nchunks=%d, vaend=%p\n",
- ary, nchunks, vaend);
- return NULL;
+ __be32 *next;
+
+ if (*p++ != xdr_zero) {
+ next = p + 1 + be32_to_cpup(p) * rpcrdma_segment_maxsz;
+ if (next > end)
+ return NULL;
+ p = next;
}
- return (__be32 *)&ary->wc_array[nchunks];
+ return p;
}
/**
@@ -158,87 +100,71 @@ static __be32 *decode_reply_array(__be32 *va, __be32 *vaend)
*/
int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg)
{
- struct rpcrdma_msg *rmsgp;
- __be32 *va, *vaend;
- unsigned int len;
- u32 hdr_len;
+ __be32 *p, *end, *rdma_argp;
+ unsigned int hdr_len;
/* Verify that there's enough bytes for header + something */
- if (rq_arg->len <= RPCRDMA_HDRLEN_ERR) {
- dprintk("svcrdma: header too short = %d\n",
- rq_arg->len);
- return -EINVAL;
- }
+ if (rq_arg->len <= RPCRDMA_HDRLEN_ERR)
+ goto out_short;
- rmsgp = (struct rpcrdma_msg *)rq_arg->head[0].iov_base;
- if (rmsgp->rm_vers != rpcrdma_version) {
- dprintk("%s: bad version %u\n", __func__,
- be32_to_cpu(rmsgp->rm_vers));
- return -EPROTONOSUPPORT;
- }
+ rdma_argp = rq_arg->head[0].iov_base;
+ if (*(rdma_argp + 1) != rpcrdma_version)
+ goto out_version;
- switch (be32_to_cpu(rmsgp->rm_type)) {
- case RDMA_MSG:
- case RDMA_NOMSG:
+ switch (*(rdma_argp + 3)) {
+ case rdma_msg:
+ case rdma_nomsg:
break;
- case RDMA_DONE:
- /* Just drop it */
- dprintk("svcrdma: dropping RDMA_DONE message\n");
- return 0;
-
- case RDMA_ERROR:
- /* Possible if this is a backchannel reply.
- * XXX: We should cancel this XID, though.
- */
- dprintk("svcrdma: dropping RDMA_ERROR message\n");
- return 0;
-
- case RDMA_MSGP:
- /* Pull in the extra for the padded case, bump our pointer */
- rmsgp->rm_body.rm_padded.rm_align =
- be32_to_cpu(rmsgp->rm_body.rm_padded.rm_align);
- rmsgp->rm_body.rm_padded.rm_thresh =
- be32_to_cpu(rmsgp->rm_body.rm_padded.rm_thresh);
-
- va = &rmsgp->rm_body.rm_padded.rm_pempty[4];
- rq_arg->head[0].iov_base = va;
- len = (u32)((unsigned long)va - (unsigned long)rmsgp);
- rq_arg->head[0].iov_len -= len;
- if (len > rq_arg->len)
- return -EINVAL;
- return len;
- default:
- dprintk("svcrdma: bad rdma procedure (%u)\n",
- be32_to_cpu(rmsgp->rm_type));
- return -EINVAL;
- }
+ case rdma_done:
+ goto out_drop;
- /* The chunk list may contain either a read chunk list or a write
- * chunk list and a reply chunk list.
- */
- va = &rmsgp->rm_body.rm_chunks[0];
- vaend = (__be32 *)((unsigned long)rmsgp + rq_arg->len);
- va = decode_read_list(va, vaend);
- if (!va) {
- dprintk("svcrdma: failed to decode read list\n");
- return -EINVAL;
- }
- va = decode_write_list(va, vaend);
- if (!va) {
- dprintk("svcrdma: failed to decode write list\n");
- return -EINVAL;
- }
- va = decode_reply_array(va, vaend);
- if (!va) {
- dprintk("svcrdma: failed to decode reply chunk\n");
- return -EINVAL;
+ case rdma_error:
+ goto out_drop;
+
+ default:
+ goto out_proc;
}
- rq_arg->head[0].iov_base = va;
- hdr_len = (unsigned long)va - (unsigned long)rmsgp;
+ end = (__be32 *)((unsigned long)rdma_argp + rq_arg->len);
+ p = xdr_check_read_list(rdma_argp + 4, end);
+ if (!p)
+ goto out_inval;
+ p = xdr_check_write_list(p, end);
+ if (!p)
+ goto out_inval;
+ p = xdr_check_reply_chunk(p, end);
+ if (!p)
+ goto out_inval;
+ if (p > end)
+ goto out_inval;
+
+ rq_arg->head[0].iov_base = p;
+ hdr_len = (unsigned long)p - (unsigned long)rdma_argp;
rq_arg->head[0].iov_len -= hdr_len;
return hdr_len;
+
+out_short:
+ dprintk("svcrdma: header too short = %d\n", rq_arg->len);
+ return -EINVAL;
+
+out_version:
+ dprintk("svcrdma: bad xprt version: %u\n",
+ be32_to_cpup(rdma_argp + 1));
+ return -EPROTONOSUPPORT;
+
+out_drop:
+ dprintk("svcrdma: dropping RDMA_DONE/ERROR message\n");
+ return 0;
+
+out_proc:
+ dprintk("svcrdma: bad rdma procedure (%u)\n",
+ be32_to_cpup(rdma_argp + 3));
+ return -EINVAL;
+
+out_inval:
+ dprintk("svcrdma: failed to parse transport header\n");
+ return -EINVAL;
}
int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
@@ -249,7 +175,7 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
*va++ = rmsgp->rm_xid;
*va++ = rmsgp->rm_vers;
- *va++ = cpu_to_be32(xprt->sc_max_requests);
+ *va++ = xprt->sc_fc_credits;
*va++ = rdma_error;
*va++ = cpu_to_be32(err);
if (err == ERR_VERS) {
@@ -260,32 +186,35 @@ int svc_rdma_xdr_encode_error(struct svcxprt_rdma *xprt,
return (int)((unsigned long)va - (unsigned long)startp);
}
-int svc_rdma_xdr_get_reply_hdr_len(struct rpcrdma_msg *rmsgp)
+/**
+ * svc_rdma_xdr_get_reply_hdr_length - Get length of Reply transport header
+ * @rdma_resp: buffer containing Reply transport header
+ *
+ * Returns length of transport header, in bytes.
+ */
+unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp)
{
- struct rpcrdma_write_array *wr_ary;
+ unsigned int nsegs;
+ __be32 *p;
- /* There is no read-list in a reply */
+ p = rdma_resp;
- /* skip write list */
- wr_ary = (struct rpcrdma_write_array *)
- &rmsgp->rm_body.rm_chunks[1];
- if (wr_ary->wc_discrim)
- wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)].
- wc_target.rs_length;
- else
- wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_nchunks;
-
- /* skip reply array */
- if (wr_ary->wc_discrim)
- wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_array[be32_to_cpu(wr_ary->wc_nchunks)];
- else
- wr_ary = (struct rpcrdma_write_array *)
- &wr_ary->wc_nchunks;
-
- return (unsigned long) wr_ary - (unsigned long) rmsgp;
+ /* RPC-over-RDMA V1 replies never have a Read list. */
+ p += rpcrdma_fixed_maxsz + 1;
+
+ /* Skip Write list. */
+ while (*p++ != xdr_zero) {
+ nsegs = be32_to_cpup(p++);
+ p += nsegs * rpcrdma_segment_maxsz;
+ }
+
+ /* Skip Reply chunk. */
+ if (*p++ != xdr_zero) {
+ nsegs = be32_to_cpup(p++);
+ p += nsegs * rpcrdma_segment_maxsz;
+ }
+
+ return (unsigned long)p - (unsigned long)rdma_resp;
}
void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *rmsgp, int chunks)
@@ -326,19 +255,3 @@ void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *ary,
seg->rs_offset = rs_offset;
seg->rs_length = cpu_to_be32(write_len);
}
-
-void svc_rdma_xdr_encode_reply_header(struct svcxprt_rdma *xprt,
- struct rpcrdma_msg *rdma_argp,
- struct rpcrdma_msg *rdma_resp,
- enum rpcrdma_proc rdma_type)
-{
- rdma_resp->rm_xid = rdma_argp->rm_xid;
- rdma_resp->rm_vers = rdma_argp->rm_vers;
- rdma_resp->rm_credit = cpu_to_be32(xprt->sc_max_requests);
- rdma_resp->rm_type = cpu_to_be32(rdma_type);
-
- /* Encode <nul> chunks lists */
- rdma_resp->rm_body.rm_chunks[0] = xdr_zero;
- rdma_resp->rm_body.rm_chunks[1] = xdr_zero;
- rdma_resp->rm_body.rm_chunks[2] = xdr_zero;
-}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 172b537f8cfc..f7b2daf72a86 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -606,26 +606,24 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
dprintk("svcrdma: rqstp=%p\n", rqstp);
- spin_lock_bh(&rdma_xprt->sc_rq_dto_lock);
+ spin_lock(&rdma_xprt->sc_rq_dto_lock);
if (!list_empty(&rdma_xprt->sc_read_complete_q)) {
- ctxt = list_entry(rdma_xprt->sc_read_complete_q.next,
- struct svc_rdma_op_ctxt,
- dto_q);
- list_del_init(&ctxt->dto_q);
- spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+ ctxt = list_first_entry(&rdma_xprt->sc_read_complete_q,
+ struct svc_rdma_op_ctxt, list);
+ list_del(&ctxt->list);
+ spin_unlock(&rdma_xprt->sc_rq_dto_lock);
rdma_read_complete(rqstp, ctxt);
goto complete;
} else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
- ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
- struct svc_rdma_op_ctxt,
- dto_q);
- list_del_init(&ctxt->dto_q);
+ ctxt = list_first_entry(&rdma_xprt->sc_rq_dto_q,
+ struct svc_rdma_op_ctxt, list);
+ list_del(&ctxt->list);
} else {
atomic_inc(&rdma_stat_rq_starve);
clear_bit(XPT_DATA, &xprt->xpt_flags);
ctxt = NULL;
}
- spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
+ spin_unlock(&rdma_xprt->sc_rq_dto_lock);
if (!ctxt) {
/* This is the EAGAIN path. The svc_recv routine will
* return -EAGAIN, the nfsd thread will go to call into
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index ad4d286a83c5..515221b16d09 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -476,7 +476,8 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_pd->local_dma_lkey;
- ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
+ ctxt->sge[0].length =
+ svc_rdma_xdr_get_reply_hdr_len((__be32 *)rdma_resp);
ctxt->sge[0].addr =
ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
ctxt->sge[0].length, DMA_TO_DEVICE);
@@ -559,12 +560,12 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
struct rpcrdma_msg *rdma_argp;
struct rpcrdma_msg *rdma_resp;
struct rpcrdma_write_array *wr_ary, *rp_ary;
- enum rpcrdma_proc reply_type;
int ret;
int inline_bytes;
struct page *res_page;
struct svc_rdma_req_map *vec;
u32 inv_rkey;
+ __be32 *p;
dprintk("svcrdma: sending response for rqstp=%p\n", rqstp);
@@ -596,12 +597,17 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!res_page)
goto err0;
rdma_resp = page_address(res_page);
- if (rp_ary)
- reply_type = RDMA_NOMSG;
- else
- reply_type = RDMA_MSG;
- svc_rdma_xdr_encode_reply_header(rdma, rdma_argp,
- rdma_resp, reply_type);
+
+ p = &rdma_resp->rm_xid;
+ *p++ = rdma_argp->rm_xid;
+ *p++ = rdma_argp->rm_vers;
+ *p++ = rdma->sc_fc_credits;
+ *p++ = rp_ary ? rdma_nomsg : rdma_msg;
+
+ /* Start with empty chunks */
+ *p++ = xdr_zero;
+ *p++ = xdr_zero;
+ *p = xdr_zero;
/* Send any write-chunk data and build resp write-list */
if (wr_ary) {
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 39652d390a9c..c13a5c35ce14 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -157,8 +157,7 @@ static struct svc_rdma_op_ctxt *alloc_ctxt(struct svcxprt_rdma *xprt,
ctxt = kmalloc(sizeof(*ctxt), flags);
if (ctxt) {
ctxt->xprt = xprt;
- INIT_LIST_HEAD(&ctxt->free);
- INIT_LIST_HEAD(&ctxt->dto_q);
+ INIT_LIST_HEAD(&ctxt->list);
}
return ctxt;
}
@@ -180,7 +179,7 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
dprintk("svcrdma: No memory for RDMA ctxt\n");
return false;
}
- list_add(&ctxt->free, &xprt->sc_ctxts);
+ list_add(&ctxt->list, &xprt->sc_ctxts);
}
return true;
}
@@ -189,15 +188,15 @@ struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt = NULL;
- spin_lock_bh(&xprt->sc_ctxt_lock);
+ spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used++;
if (list_empty(&xprt->sc_ctxts))
goto out_empty;
ctxt = list_first_entry(&xprt->sc_ctxts,
- struct svc_rdma_op_ctxt, free);
- list_del_init(&ctxt->free);
- spin_unlock_bh(&xprt->sc_ctxt_lock);
+ struct svc_rdma_op_ctxt, list);
+ list_del(&ctxt->list);
+ spin_unlock(&xprt->sc_ctxt_lock);
out:
ctxt->count = 0;
@@ -209,15 +208,15 @@ out_empty:
/* Either pre-allocation missed the mark, or send
* queue accounting is broken.
*/
- spin_unlock_bh(&xprt->sc_ctxt_lock);
+ spin_unlock(&xprt->sc_ctxt_lock);
ctxt = alloc_ctxt(xprt, GFP_NOIO);
if (ctxt)
goto out;
- spin_lock_bh(&xprt->sc_ctxt_lock);
+ spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used--;
- spin_unlock_bh(&xprt->sc_ctxt_lock);
+ spin_unlock(&xprt->sc_ctxt_lock);
WARN_ONCE(1, "svcrdma: empty RDMA ctxt list?\n");
return NULL;
}
@@ -254,10 +253,10 @@ void svc_rdma_put_context(struct svc_rdma_op_ctxt *ctxt, int free_pages)
for (i = 0; i < ctxt->count; i++)
put_page(ctxt->pages[i]);
- spin_lock_bh(&xprt->sc_ctxt_lock);
+ spin_lock(&xprt->sc_ctxt_lock);
xprt->sc_ctxt_used--;
- list_add(&ctxt->free, &xprt->sc_ctxts);
- spin_unlock_bh(&xprt->sc_ctxt_lock);
+ list_add(&ctxt->list, &xprt->sc_ctxts);
+ spin_unlock(&xprt->sc_ctxt_lock);
}
static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
@@ -266,8 +265,8 @@ static void svc_rdma_destroy_ctxts(struct svcxprt_rdma *xprt)
struct svc_rdma_op_ctxt *ctxt;
ctxt = list_first_entry(&xprt->sc_ctxts,
- struct svc_rdma_op_ctxt, free);
- list_del(&ctxt->free);
+ struct svc_rdma_op_ctxt, list);
+ list_del(&ctxt->list);
kfree(ctxt);
}
}
@@ -404,7 +403,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
/* All wc fields are now known to be valid */
ctxt->byte_len = wc->byte_len;
spin_lock(&xprt->sc_rq_dto_lock);
- list_add_tail(&ctxt->dto_q, &xprt->sc_rq_dto_q);
+ list_add_tail(&ctxt->list, &xprt->sc_rq_dto_q);
spin_unlock(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
@@ -525,7 +524,7 @@ void svc_rdma_wc_read(struct ib_cq *cq, struct ib_wc *wc)
read_hdr = ctxt->read_hdr;
spin_lock(&xprt->sc_rq_dto_lock);
- list_add_tail(&read_hdr->dto_q,
+ list_add_tail(&read_hdr->list,
&xprt->sc_read_complete_q);
spin_unlock(&xprt->sc_rq_dto_lock);
@@ -557,7 +556,6 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
return NULL;
svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
- INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
INIT_LIST_HEAD(&cma_xprt->sc_read_complete_q);
INIT_LIST_HEAD(&cma_xprt->sc_frmr_q);
@@ -571,6 +569,14 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
spin_lock_init(&cma_xprt->sc_ctxt_lock);
spin_lock_init(&cma_xprt->sc_map_lock);
+ /*
+ * Note that this implies that the underlying transport support
+ * has some form of congestion control (see RFC 7530 section 3.1
+ * paragraph 2). For now, we assume that all supported RDMA
+ * transports are suitable here.
+ */
+ set_bit(XPT_CONG_CTRL, &cma_xprt->sc_xprt.xpt_flags);
+
if (listener)
set_bit(XPT_LISTENER, &cma_xprt->sc_xprt.xpt_flags);
@@ -923,14 +929,14 @@ struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *rdma)
{
struct svc_rdma_fastreg_mr *frmr = NULL;
- spin_lock_bh(&rdma->sc_frmr_q_lock);
+ spin_lock(&rdma->sc_frmr_q_lock);
if (!list_empty(&rdma->sc_frmr_q)) {
frmr = list_entry(rdma->sc_frmr_q.next,
struct svc_rdma_fastreg_mr, frmr_list);
list_del_init(&frmr->frmr_list);
frmr->sg_nents = 0;
}
- spin_unlock_bh(&rdma->sc_frmr_q_lock);
+ spin_unlock(&rdma->sc_frmr_q_lock);
if (frmr)
return frmr;
@@ -943,10 +949,10 @@ void svc_rdma_put_frmr(struct svcxprt_rdma *rdma,
if (frmr) {
ib_dma_unmap_sg(rdma->sc_cm_id->device,
frmr->sg, frmr->sg_nents, frmr->direction);
- spin_lock_bh(&rdma->sc_frmr_q_lock);
+ spin_lock(&rdma->sc_frmr_q_lock);
WARN_ON_ONCE(!list_empty(&frmr->frmr_list));
list_add(&frmr->frmr_list, &rdma->sc_frmr_q);
- spin_unlock_bh(&rdma->sc_frmr_q_lock);
+ spin_unlock(&rdma->sc_frmr_q_lock);
}
}
@@ -1002,6 +1008,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_req_size = svcrdma_max_req_size;
newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_requests);
+ newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
svcrdma_max_bc_requests);
newxprt->sc_rq_depth = newxprt->sc_max_requests +
@@ -1027,13 +1034,13 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
goto errout;
}
newxprt->sc_sq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_sq_depth,
- 0, IB_POLL_SOFTIRQ);
+ 0, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_sq_cq)) {
dprintk("svcrdma: error creating SQ CQ for connect request\n");
goto errout;
}
newxprt->sc_rq_cq = ib_alloc_cq(dev, newxprt, newxprt->sc_rq_depth,
- 0, IB_POLL_SOFTIRQ);
+ 0, IB_POLL_WORKQUEUE);
if (IS_ERR(newxprt->sc_rq_cq)) {
dprintk("svcrdma: error creating RQ CQ for connect request\n");
goto errout;
@@ -1213,20 +1220,18 @@ static void __svc_rdma_free(struct work_struct *work)
*/
while (!list_empty(&rdma->sc_read_complete_q)) {
struct svc_rdma_op_ctxt *ctxt;
- ctxt = list_entry(rdma->sc_read_complete_q.next,
- struct svc_rdma_op_ctxt,
- dto_q);
- list_del_init(&ctxt->dto_q);
+ ctxt = list_first_entry(&rdma->sc_read_complete_q,
+ struct svc_rdma_op_ctxt, list);
+ list_del(&ctxt->list);
svc_rdma_put_context(ctxt, 1);
}
/* Destroy queued, but not processed recv completions */
while (!list_empty(&rdma->sc_rq_dto_q)) {
struct svc_rdma_op_ctxt *ctxt;
- ctxt = list_entry(rdma->sc_rq_dto_q.next,
- struct svc_rdma_op_ctxt,
- dto_q);
- list_del_init(&ctxt->dto_q);
+ ctxt = list_first_entry(&rdma->sc_rq_dto_q,
+ struct svc_rdma_op_ctxt, list);
+ list_del(&ctxt->list);
svc_rdma_put_context(ctxt, 1);
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 534c178d2a7e..c717f5410776 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -67,7 +67,7 @@ unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
static unsigned int xprt_rdma_inline_write_padding;
static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
- int xprt_rdma_pad_optimize = 1;
+ int xprt_rdma_pad_optimize = 0;
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
@@ -709,10 +709,6 @@ xprt_rdma_send_request(struct rpc_task *task)
return 0;
failed_marshal:
- dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
- __func__, rc);
- if (rc == -EIO)
- r_xprt->rx_stats.failed_marshal_count++;
if (rc != -ENOTCONN)
return rc;
drop_connection:
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 11d07748f699..81cd31acf690 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -54,6 +54,7 @@
#include <linux/sunrpc/svc_rdma.h>
#include <asm/bitops.h>
#include <linux/module.h> /* try_module_get()/module_put() */
+#include <rdma/ib_cm.h>
#include "xprt_rdma.h"
@@ -208,6 +209,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
/* Default settings for RPC-over-RDMA Version One */
r_xprt->rx_ia.ri_reminv_expected = false;
+ r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize;
rsize = RPCRDMA_V1_DEF_INLINE_SIZE;
wsize = RPCRDMA_V1_DEF_INLINE_SIZE;
@@ -215,6 +217,7 @@ rpcrdma_update_connect_private(struct rpcrdma_xprt *r_xprt,
pmsg->cp_magic == rpcrdma_cmp_magic &&
pmsg->cp_version == RPCRDMA_CMP_VERSION) {
r_xprt->rx_ia.ri_reminv_expected = true;
+ r_xprt->rx_ia.ri_implicit_roundup = true;
rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size);
wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size);
}
@@ -277,7 +280,14 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
connstate = -ENETDOWN;
goto connected;
case RDMA_CM_EVENT_REJECTED:
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+ pr_info("rpcrdma: connection to %pIS:%u on %s rejected: %s\n",
+ sap, rpc_get_port(sap), ia->ri_device->name,
+ rdma_reject_msg(id, event->status));
+#endif
connstate = -ECONNREFUSED;
+ if (event->status == IB_CM_REJ_STALE_CONN)
+ connstate = -EAGAIN;
goto connected;
case RDMA_CM_EVENT_DISCONNECTED:
connstate = -ECONNABORTED;
@@ -486,18 +496,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
*/
int
rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
- struct rpcrdma_create_data_internal *cdata)
+ struct rpcrdma_create_data_internal *cdata)
{
struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private;
+ unsigned int max_qp_wr, max_sge;
struct ib_cq *sendcq, *recvcq;
- unsigned int max_qp_wr;
int rc;
- if (ia->ri_device->attrs.max_sge < RPCRDMA_MAX_SEND_SGES) {
- dprintk("RPC: %s: insufficient sge's available\n",
- __func__);
+ max_sge = min(ia->ri_device->attrs.max_sge, RPCRDMA_MAX_SEND_SGES);
+ if (max_sge < RPCRDMA_MIN_SEND_SGES) {
+ pr_warn("rpcrdma: HCA provides only %d send SGEs\n", max_sge);
return -ENOMEM;
}
+ ia->ri_max_send_sges = max_sge - RPCRDMA_MIN_SEND_SGES;
if (ia->ri_device->attrs.max_qp_wr <= RPCRDMA_BACKWARD_WRS) {
dprintk("RPC: %s: insufficient wqe's available\n",
@@ -522,7 +533,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_wr = cdata->max_requests;
ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
ep->rep_attr.cap.max_recv_wr += 1; /* drain cqe */
- ep->rep_attr.cap.max_send_sge = RPCRDMA_MAX_SEND_SGES;
+ ep->rep_attr.cap.max_send_sge = max_sge;
ep->rep_attr.cap.max_recv_sge = 1;
ep->rep_attr.cap.max_inline_data = 0;
ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
@@ -640,20 +651,21 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
int
rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
+ struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
+ rx_ia);
struct rdma_cm_id *id, *old;
+ struct sockaddr *sap;
+ unsigned int extras;
int rc = 0;
- int retry_count = 0;
if (ep->rep_connected != 0) {
- struct rpcrdma_xprt *xprt;
retry:
dprintk("RPC: %s: reconnecting...\n", __func__);
rpcrdma_ep_disconnect(ep, ia);
- xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
- id = rpcrdma_create_id(xprt, ia,
- (struct sockaddr *)&xprt->rx_data.addr);
+ sap = (struct sockaddr *)&r_xprt->rx_data.addr;
+ id = rpcrdma_create_id(r_xprt, ia, sap);
if (IS_ERR(id)) {
rc = -EHOSTUNREACH;
goto out;
@@ -708,51 +720,18 @@ retry:
}
wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0);
-
- /*
- * Check state. A non-peer reject indicates no listener
- * (ECONNREFUSED), which may be a transient state. All
- * others indicate a transport condition which has already
- * undergone a best-effort.
- */
- if (ep->rep_connected == -ECONNREFUSED &&
- ++retry_count <= RDMA_CONNECT_RETRY_MAX) {
- dprintk("RPC: %s: non-peer_reject, retry\n", __func__);
- goto retry;
- }
if (ep->rep_connected <= 0) {
- /* Sometimes, the only way to reliably connect to remote
- * CMs is to use same nonzero values for ORD and IRD. */
- if (retry_count++ <= RDMA_CONNECT_RETRY_MAX + 1 &&
- (ep->rep_remote_cma.responder_resources == 0 ||
- ep->rep_remote_cma.initiator_depth !=
- ep->rep_remote_cma.responder_resources)) {
- if (ep->rep_remote_cma.responder_resources == 0)
- ep->rep_remote_cma.responder_resources = 1;
- ep->rep_remote_cma.initiator_depth =
- ep->rep_remote_cma.responder_resources;
+ if (ep->rep_connected == -EAGAIN)
goto retry;
- }
rc = ep->rep_connected;
- } else {
- struct rpcrdma_xprt *r_xprt;
- unsigned int extras;
-
- dprintk("RPC: %s: connected\n", __func__);
-
- r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
- extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
-
- if (extras) {
- rc = rpcrdma_ep_post_extra_recv(r_xprt, extras);
- if (rc) {
- pr_warn("%s: rpcrdma_ep_post_extra_recv: %i\n",
- __func__, rc);
- rc = 0;
- }
- }
+ goto out;
}
+ dprintk("RPC: %s: connected\n", __func__);
+ extras = r_xprt->rx_buf.rb_bc_srv_max_requests;
+ if (extras)
+ rpcrdma_ep_post_extra_recv(r_xprt, extras);
+
out:
if (rc)
ep->rep_connected = rc;
@@ -797,9 +776,7 @@ rpcrdma_mr_recovery_worker(struct work_struct *work)
spin_lock(&buf->rb_recovery_lock);
while (!list_empty(&buf->rb_stale_mrs)) {
- mw = list_first_entry(&buf->rb_stale_mrs,
- struct rpcrdma_mw, mw_list);
- list_del_init(&mw->mw_list);
+ mw = rpcrdma_pop_mw(&buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
dprintk("RPC: %s: recovering MR %p\n", __func__, mw);
@@ -817,7 +794,7 @@ rpcrdma_defer_mr_recovery(struct rpcrdma_mw *mw)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
spin_lock(&buf->rb_recovery_lock);
- list_add(&mw->mw_list, &buf->rb_stale_mrs);
+ rpcrdma_push_mw(mw, &buf->rb_stale_mrs);
spin_unlock(&buf->rb_recovery_lock);
schedule_delayed_work(&buf->rb_recovery_worker, 0);
@@ -1093,11 +1070,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_mw *mw = NULL;
spin_lock(&buf->rb_mwlock);
- if (!list_empty(&buf->rb_mws)) {
- mw = list_first_entry(&buf->rb_mws,
- struct rpcrdma_mw, mw_list);
- list_del_init(&mw->mw_list);
- }
+ if (!list_empty(&buf->rb_mws))
+ mw = rpcrdma_pop_mw(&buf->rb_mws);
spin_unlock(&buf->rb_mwlock);
if (!mw)
@@ -1120,7 +1094,7 @@ rpcrdma_put_mw(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mw *mw)
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
spin_lock(&buf->rb_mwlock);
- list_add_tail(&mw->mw_list, &buf->rb_mws);
+ rpcrdma_push_mw(mw, &buf->rb_mws);
spin_unlock(&buf->rb_mwlock);
}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index e35efd4ac1e4..171a35116de9 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -74,7 +74,9 @@ struct rpcrdma_ia {
unsigned int ri_max_frmr_depth;
unsigned int ri_max_inline_write;
unsigned int ri_max_inline_read;
+ unsigned int ri_max_send_sges;
bool ri_reminv_expected;
+ bool ri_implicit_roundup;
enum ib_mr_type ri_mrtype;
struct ib_qp_attr ri_qp_attr;
struct ib_qp_init_attr ri_qp_init_attr;
@@ -303,15 +305,19 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
char *mr_offset; /* kva if no page, else offset */
};
-/* Reserve enough Send SGEs to send a maximum size inline request:
+/* The Send SGE array is provisioned to send a maximum size
+ * inline request:
* - RPC-over-RDMA header
* - xdr_buf head iovec
- * - RPCRDMA_MAX_INLINE bytes, possibly unaligned, in pages
+ * - RPCRDMA_MAX_INLINE bytes, in pages
* - xdr_buf tail iovec
+ *
+ * The actual number of array elements consumed by each RPC
+ * depends on the device's max_sge limit.
*/
enum {
- RPCRDMA_MAX_SEND_PAGES = PAGE_SIZE + RPCRDMA_MAX_INLINE - 1,
- RPCRDMA_MAX_PAGE_SGES = (RPCRDMA_MAX_SEND_PAGES >> PAGE_SHIFT) + 1,
+ RPCRDMA_MIN_SEND_SGES = 3,
+ RPCRDMA_MAX_PAGE_SGES = RPCRDMA_MAX_INLINE >> PAGE_SHIFT,
RPCRDMA_MAX_SEND_SGES = 1 + 1 + RPCRDMA_MAX_PAGE_SGES + 1,
};
@@ -348,6 +354,22 @@ rpcr_to_rdmar(struct rpc_rqst *rqst)
return rqst->rq_xprtdata;
}
+static inline void
+rpcrdma_push_mw(struct rpcrdma_mw *mw, struct list_head *list)
+{
+ list_add_tail(&mw->mw_list, list);
+}
+
+static inline struct rpcrdma_mw *
+rpcrdma_pop_mw(struct list_head *list)
+{
+ struct rpcrdma_mw *mw;
+
+ mw = list_first_entry(list, struct rpcrdma_mw, mw_list);
+ list_del(&mw->mw_list);
+ return mw;
+}
+
/*
* struct rpcrdma_buffer -- holds list/queue of pre-registered memory for
* inline requests/replies, and client/server credits.
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 956c7bce80d1..16aff8ddc16f 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,8 @@
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
+static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
+ struct socket *sock);
/*
* xprtsock tunables
@@ -666,6 +668,9 @@ static int xs_tcp_send_request(struct rpc_task *task)
if (task->tk_flags & RPC_TASK_SENT)
zerocopy = false;
+ if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
+ xs_tcp_set_socket_timeouts(xprt, transport->sock);
+
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
@@ -1734,7 +1739,9 @@ static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t
*/
static void xs_udp_timer(struct rpc_xprt *xprt, struct rpc_task *task)
{
+ spin_lock_bh(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, -ETIMEDOUT);
+ spin_unlock_bh(&xprt->transport_lock);
}
static unsigned short xs_get_random_port(void)
@@ -2235,6 +2242,66 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
xs_reset_transport(transport);
}
+static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
+ struct socket *sock)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ unsigned int keepidle;
+ unsigned int keepcnt;
+ unsigned int opt_on = 1;
+ unsigned int timeo;
+
+ spin_lock_bh(&xprt->transport_lock);
+ keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
+ keepcnt = xprt->timeout->to_retries + 1;
+ timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
+ (xprt->timeout->to_retries + 1);
+ clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
+ spin_unlock_bh(&xprt->transport_lock);
+
+ /* TCP Keepalive options */
+ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+ (char *)&opt_on, sizeof(opt_on));
+ kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
+ (char *)&keepidle, sizeof(keepidle));
+ kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
+ (char *)&keepidle, sizeof(keepidle));
+ kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
+ (char *)&keepcnt, sizeof(keepcnt));
+
+ /* TCP user timeout (see RFC5482) */
+ kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
+ (char *)&timeo, sizeof(timeo));
+}
+
+static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
+ unsigned long connect_timeout,
+ unsigned long reconnect_timeout)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ struct rpc_timeout to;
+ unsigned long initval;
+
+ spin_lock_bh(&xprt->transport_lock);
+ if (reconnect_timeout < xprt->max_reconnect_timeout)
+ xprt->max_reconnect_timeout = reconnect_timeout;
+ if (connect_timeout < xprt->connect_timeout) {
+ memcpy(&to, xprt->timeout, sizeof(to));
+ initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
+ /* Arbitrary lower limit */
+ if (initval < XS_TCP_INIT_REEST_TO << 1)
+ initval = XS_TCP_INIT_REEST_TO << 1;
+ to.to_initval = initval;
+ to.to_maxval = initval;
+ memcpy(&transport->tcp_timeout, &to,
+ sizeof(transport->tcp_timeout));
+ xprt->timeout = &transport->tcp_timeout;
+ xprt->connect_timeout = connect_timeout;
+ }
+ set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2242,22 +2309,8 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!transport->inet) {
struct sock *sk = sock->sk;
- unsigned int keepidle = xprt->timeout->to_initval / HZ;
- unsigned int keepcnt = xprt->timeout->to_retries + 1;
- unsigned int opt_on = 1;
- unsigned int timeo;
unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
- /* TCP Keepalive options */
- kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
- (char *)&opt_on, sizeof(opt_on));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
- (char *)&keepidle, sizeof(keepidle));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
- (char *)&keepidle, sizeof(keepidle));
- kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
- (char *)&keepcnt, sizeof(keepcnt));
-
/* Avoid temporary address, they are bad for long-lived
* connections such as NFS mounts.
* RFC4941, section 3.6 suggests that:
@@ -2268,11 +2321,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
(char *)&addr_pref, sizeof(addr_pref));
- /* TCP user timeout (see RFC5482) */
- timeo = jiffies_to_msecs(xprt->timeout->to_initval) *
- (xprt->timeout->to_retries + 1);
- kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
- (char *)&timeo, sizeof(timeo));
+ xs_tcp_set_socket_timeouts(xprt, sock);
write_lock_bh(&sk->sk_callback_lock);
@@ -2721,6 +2770,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = xs_tcp_shutdown,
.destroy = xs_destroy,
+ .set_connect_timeout = xs_tcp_set_connect_timeout,
.print_stats = xs_tcp_print_stats,
.enable_swap = xs_enable_swap,
.disable_swap = xs_disable_swap,
@@ -3007,6 +3057,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->timeout = &xs_tcp_default_timeout;
xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
+ xprt->connect_timeout = xprt->timeout->to_initval *
+ (xprt->timeout->to_retries + 1);
INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
@@ -3209,7 +3261,9 @@ static int param_set_uint_minmax(const char *val,
if (!val)
return -EINVAL;
ret = kstrtouint(val, 0, &num);
- if (ret == -EINVAL || num < min || num > max)
+ if (ret)
+ return ret;
+ if (num < min || num > max)
return -EINVAL;
*((unsigned int *)kp->arg) = num;
return 0;
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 6b09a778cc71..43e4045e72bc 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -35,6 +35,8 @@
*/
#include <linux/rhashtable.h>
+#include <linux/sched/signal.h>
+
#include "core.h"
#include "name_table.h"
#include "node.h"
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e2d18b9f910f..ee37b390260a 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -85,7 +85,7 @@
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/signal.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h>
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 8a398b3fb532..9192ead66751 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -90,6 +90,7 @@
#include <linux/init.h>
#include <linux/io.h>
#include <linux/kernel.h>
+#include <linux/sched/signal.h>
#include <linux/kmod.h>
#include <linux/list.h>
#include <linux/miscdevice.h>
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 6788264acc63..9d24c0e958b1 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -532,7 +532,8 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
vsock->vdev = vdev;
ret = vsock->vdev->config->find_vqs(vsock->vdev, VSOCK_VQ_MAX,
- vsock->vqs, callbacks, names);
+ vsock->vqs, callbacks, names,
+ NULL);
if (ret < 0)
goto out;
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index 849c4ad0411e..8d592a45b597 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -9,6 +9,7 @@
*/
#include <linux/spinlock.h>
#include <linux/module.h>
+#include <linux/sched/signal.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/virtio.h>
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 079c883aa96e..fd28a49dbe8f 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -41,7 +41,7 @@
#include <linux/capability.h>
#include <linux/errno.h>
#include <linux/kernel.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
#include <linux/timer.h>
#include <linux/string.h>
#include <linux/net.h>