From a3be19b91ea7121d388084e8c07f5b1b982eb40c Mon Sep 17 00:00:00 2001
From: Wenchao Hao <haowenchao@huawei.com>
Date: Sat, 26 Nov 2022 09:07:52 +0800
Subject: scsi: iscsi: Fix multiple iSCSI session unbind events sent to
 userspace

It was observed that the kernel would potentially send
ISCSI_KEVENT_UNBIND_SESSION multiple times. Introduce 'target_state' in
iscsi_cls_session() to make sure session will send only one unbind session
event.

This introduces a regression wrt. the issue fixed in commit 13e60d3ba287
("scsi: iscsi: Report unbind session event when the target has been
removed"). If iscsid dies for any reason after sending an unbind session to
kernel, once iscsid is restarted, the kernel's ISCSI_KEVENT_UNBIND_SESSION
event is lost and userspace is then unable to logout. However, the session
is actually in invalid state (its target_id is INVALID) so iscsid should
not sync this session during restart.

Consequently we need to check the session's target state during iscsid
restart.  If session is in unbound state, do not sync this session and
perform session teardown. This is OK because once a session is unbound, we
can not recover it any more (mainly because its target id is INVALID).

Signed-off-by: Wenchao Hao <haowenchao@huawei.com>
Link: https://lore.kernel.org/r/20221126010752.231917-1-haowenchao@huawei.com
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Reviewed-by: Wu Bo <wubo40@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/scsi/scsi_transport_iscsi.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index cab52b0f11d0..34c03707fb6e 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -236,6 +236,14 @@ enum {
 	ISCSI_SESSION_FREE,
 };
 
+enum {
+	ISCSI_SESSION_TARGET_UNBOUND,
+	ISCSI_SESSION_TARGET_ALLOCATED,
+	ISCSI_SESSION_TARGET_SCANNED,
+	ISCSI_SESSION_TARGET_UNBINDING,
+	ISCSI_SESSION_TARGET_MAX,
+};
+
 #define ISCSI_MAX_TARGET -1
 
 struct iscsi_cls_session {
@@ -264,6 +272,7 @@ struct iscsi_cls_session {
 	 */
 	pid_t creator;
 	int state;
+	int target_state;			/* session target bind state */
 	int sid;				/* session id */
 	void *dd_data;				/* LLD private data */
 	struct device dev;	/* sysfs transport/container device */
-- 
cgit v1.3.1


From 7cffcade57a429667447c4f41d8414bbcf1b3aaa Mon Sep 17 00:00:00 2001
From: Dawei Li <set_pte_at@outlook.com>
Date: Tue, 13 Dec 2022 23:46:52 +0800
Subject: xen: make remove callback of xen driver void returned

Since commit fc7a6209d571 ("bus: Make remove callback return void")
forces bus_type::remove be void-returned, it doesn't make much sense for
any bus based driver implementing remove callbalk to return non-void to
its caller.

This change is for xen bus based drivers.

Acked-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Dawei Li <set_pte_at@outlook.com>
Link: https://lore.kernel.org/r/TYCP286MB23238119AB4DF190997075C9CAE39@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 drivers/block/xen-blkback/xenbus.c  | 4 +---
 drivers/block/xen-blkfront.c        | 3 +--
 drivers/char/tpm/xen-tpmfront.c     | 3 +--
 drivers/gpu/drm/xen/xen_drm_front.c | 3 +--
 drivers/input/misc/xen-kbdfront.c   | 5 ++---
 drivers/net/xen-netback/xenbus.c    | 3 +--
 drivers/net/xen-netfront.c          | 4 +---
 drivers/pci/xen-pcifront.c          | 4 +---
 drivers/scsi/xen-scsifront.c        | 4 +---
 drivers/tty/hvc/hvc_xen.c           | 4 ++--
 drivers/usb/host/xen-hcd.c          | 4 +---
 drivers/video/fbdev/xen-fbfront.c   | 6 ++----
 drivers/xen/pvcalls-back.c          | 3 +--
 drivers/xen/pvcalls-front.c         | 3 +--
 drivers/xen/xen-pciback/xenbus.c    | 4 +---
 drivers/xen/xen-scsiback.c          | 4 +---
 include/xen/xenbus.h                | 2 +-
 net/9p/trans_xen.c                  | 3 +--
 sound/xen/xen_snd_front.c           | 3 +--
 19 files changed, 22 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index c0227dfa4688..4807af1d5805 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -524,7 +524,7 @@ static int xen_vbd_create(struct xen_blkif *blkif, blkif_vdev_t handle,
 	return 0;
 }
 
-static int xen_blkbk_remove(struct xenbus_device *dev)
+static void xen_blkbk_remove(struct xenbus_device *dev)
 {
 	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
@@ -547,8 +547,6 @@ static int xen_blkbk_remove(struct xenbus_device *dev)
 		/* Put the reference we set in xen_blkif_alloc(). */
 		xen_blkif_put(be->blkif);
 	}
-
-	return 0;
 }
 
 int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 35b9bcad9db9..e68576ded7cb 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2468,7 +2468,7 @@ static void blkback_changed(struct xenbus_device *dev,
 	}
 }
 
-static int blkfront_remove(struct xenbus_device *xbdev)
+static void blkfront_remove(struct xenbus_device *xbdev)
 {
 	struct blkfront_info *info = dev_get_drvdata(&xbdev->dev);
 
@@ -2489,7 +2489,6 @@ static int blkfront_remove(struct xenbus_device *xbdev)
 	}
 
 	kfree(info);
-	return 0;
 }
 
 static int blkfront_is_ready(struct xenbus_device *dev)
diff --git a/drivers/char/tpm/xen-tpmfront.c b/drivers/char/tpm/xen-tpmfront.c
index 379291826261..80cca3b83b22 100644
--- a/drivers/char/tpm/xen-tpmfront.c
+++ b/drivers/char/tpm/xen-tpmfront.c
@@ -360,14 +360,13 @@ static int tpmfront_probe(struct xenbus_device *dev,
 	return tpm_chip_register(priv->chip);
 }
 
-static int tpmfront_remove(struct xenbus_device *dev)
+static void tpmfront_remove(struct xenbus_device *dev)
 {
 	struct tpm_chip *chip = dev_get_drvdata(&dev->dev);
 	struct tpm_private *priv = dev_get_drvdata(&chip->dev);
 	tpm_chip_unregister(chip);
 	ring_free(priv);
 	dev_set_drvdata(&chip->dev, NULL);
-	return 0;
 }
 
 static int tpmfront_resume(struct xenbus_device *dev)
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
index 0d8e6bd1ccbf..90996c108146 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -717,7 +717,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev,
 	return xenbus_switch_state(xb_dev, XenbusStateInitialising);
 }
 
-static int xen_drv_remove(struct xenbus_device *dev)
+static void xen_drv_remove(struct xenbus_device *dev)
 {
 	struct xen_drm_front_info *front_info = dev_get_drvdata(&dev->dev);
 	int to = 100;
@@ -751,7 +751,6 @@ static int xen_drv_remove(struct xenbus_device *dev)
 
 	xen_drm_drv_fini(front_info);
 	xenbus_frontend_closed(dev);
-	return 0;
 }
 
 static const struct xenbus_device_id xen_driver_ids[] = {
diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c
index 8d8ebdc2039b..67f1c7364c95 100644
--- a/drivers/input/misc/xen-kbdfront.c
+++ b/drivers/input/misc/xen-kbdfront.c
@@ -51,7 +51,7 @@ module_param_array(ptr_size, int, NULL, 0444);
 MODULE_PARM_DESC(ptr_size,
 	"Pointing device width, height in pixels (default 800,600)");
 
-static int xenkbd_remove(struct xenbus_device *);
+static void xenkbd_remove(struct xenbus_device *);
 static int xenkbd_connect_backend(struct xenbus_device *, struct xenkbd_info *);
 static void xenkbd_disconnect_backend(struct xenkbd_info *);
 
@@ -404,7 +404,7 @@ static int xenkbd_resume(struct xenbus_device *dev)
 	return xenkbd_connect_backend(dev, info);
 }
 
-static int xenkbd_remove(struct xenbus_device *dev)
+static void xenkbd_remove(struct xenbus_device *dev)
 {
 	struct xenkbd_info *info = dev_get_drvdata(&dev->dev);
 
@@ -417,7 +417,6 @@ static int xenkbd_remove(struct xenbus_device *dev)
 		input_unregister_device(info->mtouch);
 	free_page((unsigned long)info->page);
 	kfree(info);
-	return 0;
 }
 
 static int xenkbd_connect_backend(struct xenbus_device *dev,
diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
index c1ba4294f364..001636901dda 100644
--- a/drivers/net/xen-netback/xenbus.c
+++ b/drivers/net/xen-netback/xenbus.c
@@ -977,7 +977,7 @@ static int read_xenbus_vif_flags(struct backend_info *be)
 	return 0;
 }
 
-static int netback_remove(struct xenbus_device *dev)
+static void netback_remove(struct xenbus_device *dev)
 {
 	struct backend_info *be = dev_get_drvdata(&dev->dev);
 
@@ -992,7 +992,6 @@ static int netback_remove(struct xenbus_device *dev)
 	kfree(be->hotplug_script);
 	kfree(be);
 	dev_set_drvdata(&dev->dev, NULL);
-	return 0;
 }
 
 /*
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 9af2b027c19c..bc17f5391b1a 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -2640,7 +2640,7 @@ static void xennet_bus_close(struct xenbus_device *dev)
 	} while (!ret);
 }
 
-static int xennet_remove(struct xenbus_device *dev)
+static void xennet_remove(struct xenbus_device *dev)
 {
 	struct netfront_info *info = dev_get_drvdata(&dev->dev);
 
@@ -2656,8 +2656,6 @@ static int xennet_remove(struct xenbus_device *dev)
 		rtnl_unlock();
 	}
 	xennet_free_netdev(info->netdev);
-
-	return 0;
 }
 
 static const struct xenbus_device_id netfront_ids[] = {
diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
index 7378e2f3e525..fcd029ca2eb1 100644
--- a/drivers/pci/xen-pcifront.c
+++ b/drivers/pci/xen-pcifront.c
@@ -1055,14 +1055,12 @@ out:
 	return err;
 }
 
-static int pcifront_xenbus_remove(struct xenbus_device *xdev)
+static void pcifront_xenbus_remove(struct xenbus_device *xdev)
 {
 	struct pcifront_device *pdev = dev_get_drvdata(&xdev->dev);
 
 	if (pdev)
 		free_pdev(pdev);
-
-	return 0;
 }
 
 static const struct xenbus_device_id xenpci_ids[] = {
diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 66b316d173b0..71a3bb83984c 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -995,7 +995,7 @@ static int scsifront_suspend(struct xenbus_device *dev)
 	return err;
 }
 
-static int scsifront_remove(struct xenbus_device *dev)
+static void scsifront_remove(struct xenbus_device *dev)
 {
 	struct vscsifrnt_info *info = dev_get_drvdata(&dev->dev);
 
@@ -1011,8 +1011,6 @@ static int scsifront_remove(struct xenbus_device *dev)
 
 	scsifront_free_ring(info);
 	scsi_host_put(info->host);
-
-	return 0;
 }
 
 static void scsifront_disconnect(struct vscsifrnt_info *info)
diff --git a/drivers/tty/hvc/hvc_xen.c b/drivers/tty/hvc/hvc_xen.c
index 7c23112dc923..c879f922c716 100644
--- a/drivers/tty/hvc/hvc_xen.c
+++ b/drivers/tty/hvc/hvc_xen.c
@@ -394,9 +394,9 @@ static int xen_console_remove(struct xencons_info *info)
 	return 0;
 }
 
-static int xencons_remove(struct xenbus_device *dev)
+static void xencons_remove(struct xenbus_device *dev)
 {
-	return xen_console_remove(dev_get_drvdata(&dev->dev));
+	xen_console_remove(dev_get_drvdata(&dev->dev));
 }
 
 static int xencons_connect_backend(struct xenbus_device *dev,
diff --git a/drivers/usb/host/xen-hcd.c b/drivers/usb/host/xen-hcd.c
index de1b09158318..46fdab940092 100644
--- a/drivers/usb/host/xen-hcd.c
+++ b/drivers/usb/host/xen-hcd.c
@@ -1530,15 +1530,13 @@ static void xenhcd_backend_changed(struct xenbus_device *dev,
 	}
 }
 
-static int xenhcd_remove(struct xenbus_device *dev)
+static void xenhcd_remove(struct xenbus_device *dev)
 {
 	struct xenhcd_info *info = dev_get_drvdata(&dev->dev);
 	struct usb_hcd *hcd = xenhcd_info_to_hcd(info);
 
 	xenhcd_destroy_rings(info);
 	usb_put_hcd(hcd);
-
-	return 0;
 }
 
 static int xenhcd_probe(struct xenbus_device *dev,
diff --git a/drivers/video/fbdev/xen-fbfront.c b/drivers/video/fbdev/xen-fbfront.c
index 4d2694d904aa..ae8a50ecdbd3 100644
--- a/drivers/video/fbdev/xen-fbfront.c
+++ b/drivers/video/fbdev/xen-fbfront.c
@@ -67,7 +67,7 @@ MODULE_PARM_DESC(video,
 	"Video memory size in MB, width, height in pixels (default 2,800,600)");
 
 static void xenfb_make_preferred_console(void);
-static int xenfb_remove(struct xenbus_device *);
+static void xenfb_remove(struct xenbus_device *);
 static void xenfb_init_shared_page(struct xenfb_info *, struct fb_info *);
 static int xenfb_connect_backend(struct xenbus_device *, struct xenfb_info *);
 static void xenfb_disconnect_backend(struct xenfb_info *);
@@ -527,7 +527,7 @@ static int xenfb_resume(struct xenbus_device *dev)
 	return xenfb_connect_backend(dev, info);
 }
 
-static int xenfb_remove(struct xenbus_device *dev)
+static void xenfb_remove(struct xenbus_device *dev)
 {
 	struct xenfb_info *info = dev_get_drvdata(&dev->dev);
 
@@ -542,8 +542,6 @@ static int xenfb_remove(struct xenbus_device *dev)
 	vfree(info->gfns);
 	vfree(info->fb);
 	kfree(info);
-
-	return 0;
 }
 
 static unsigned long vmalloc_to_gfn(void *address)
diff --git a/drivers/xen/pvcalls-back.c b/drivers/xen/pvcalls-back.c
index d6f945fd4147..ea52a2092bb8 100644
--- a/drivers/xen/pvcalls-back.c
+++ b/drivers/xen/pvcalls-back.c
@@ -1181,9 +1181,8 @@ static void pvcalls_back_changed(struct xenbus_device *dev,
 	}
 }
 
-static int pvcalls_back_remove(struct xenbus_device *dev)
+static void pvcalls_back_remove(struct xenbus_device *dev)
 {
-	return 0;
 }
 
 static int pvcalls_back_uevent(struct xenbus_device *xdev,
diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index 1826e8e67125..5328f4d35f25 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -1085,7 +1085,7 @@ static const struct xenbus_device_id pvcalls_front_ids[] = {
 	{ "" }
 };
 
-static int pvcalls_front_remove(struct xenbus_device *dev)
+static void pvcalls_front_remove(struct xenbus_device *dev)
 {
 	struct pvcalls_bedata *bedata;
 	struct sock_mapping *map = NULL, *n;
@@ -1121,7 +1121,6 @@ static int pvcalls_front_remove(struct xenbus_device *dev)
 	kfree(bedata->ring.sring);
 	kfree(bedata);
 	xenbus_switch_state(dev, XenbusStateClosed);
-	return 0;
 }
 
 static int pvcalls_front_probe(struct xenbus_device *dev,
diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c
index d171091eec12..b11e401f1b1e 100644
--- a/drivers/xen/xen-pciback/xenbus.c
+++ b/drivers/xen/xen-pciback/xenbus.c
@@ -716,14 +716,12 @@ out:
 	return err;
 }
 
-static int xen_pcibk_xenbus_remove(struct xenbus_device *dev)
+static void xen_pcibk_xenbus_remove(struct xenbus_device *dev)
 {
 	struct xen_pcibk_device *pdev = dev_get_drvdata(&dev->dev);
 
 	if (pdev != NULL)
 		free_pdev(pdev);
-
-	return 0;
 }
 
 static const struct xenbus_device_id xen_pcibk_ids[] = {
diff --git a/drivers/xen/xen-scsiback.c b/drivers/xen/xen-scsiback.c
index 6106ed93817d..954188b0b858 100644
--- a/drivers/xen/xen-scsiback.c
+++ b/drivers/xen/xen-scsiback.c
@@ -1249,7 +1249,7 @@ static void scsiback_release_translation_entry(struct vscsibk_info *info)
 	spin_unlock_irqrestore(&info->v2p_lock, flags);
 }
 
-static int scsiback_remove(struct xenbus_device *dev)
+static void scsiback_remove(struct xenbus_device *dev)
 {
 	struct vscsibk_info *info = dev_get_drvdata(&dev->dev);
 
@@ -1261,8 +1261,6 @@ static int scsiback_remove(struct xenbus_device *dev)
 	gnttab_page_cache_shrink(&info->free_pages, 0);
 
 	dev_set_drvdata(&dev->dev, NULL);
-
-	return 0;
 }
 
 static int scsiback_probe(struct xenbus_device *dev,
diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
index eaa932b99d8a..ad4fb4eab753 100644
--- a/include/xen/xenbus.h
+++ b/include/xen/xenbus.h
@@ -117,7 +117,7 @@ struct xenbus_driver {
 		     const struct xenbus_device_id *id);
 	void (*otherend_changed)(struct xenbus_device *dev,
 				 enum xenbus_state backend_state);
-	int (*remove)(struct xenbus_device *dev);
+	void (*remove)(struct xenbus_device *dev);
 	int (*suspend)(struct xenbus_device *dev);
 	int (*resume)(struct xenbus_device *dev);
 	int (*uevent)(struct xenbus_device *, struct kobj_uevent_env *);
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index aaa5fd364691..9950e1a5acb7 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -303,13 +303,12 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
 	kfree(priv);
 }
 
-static int xen_9pfs_front_remove(struct xenbus_device *dev)
+static void xen_9pfs_front_remove(struct xenbus_device *dev)
 {
 	struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
 
 	dev_set_drvdata(&dev->dev, NULL);
 	xen_9pfs_front_free(priv);
-	return 0;
 }
 
 static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
diff --git a/sound/xen/xen_snd_front.c b/sound/xen/xen_snd_front.c
index 4041748c12e5..b66e037710d0 100644
--- a/sound/xen/xen_snd_front.c
+++ b/sound/xen/xen_snd_front.c
@@ -311,7 +311,7 @@ static int xen_drv_probe(struct xenbus_device *xb_dev,
 	return xenbus_switch_state(xb_dev, XenbusStateInitialising);
 }
 
-static int xen_drv_remove(struct xenbus_device *dev)
+static void xen_drv_remove(struct xenbus_device *dev)
 {
 	struct xen_snd_front_info *front_info = dev_get_drvdata(&dev->dev);
 	int to = 100;
@@ -345,7 +345,6 @@ static int xen_drv_remove(struct xenbus_device *dev)
 
 	xen_snd_drv_fini(front_info);
 	xenbus_frontend_closed(dev);
-	return 0;
 }
 
 static const struct xenbus_device_id xen_drv_ids[] = {
-- 
cgit v1.3.1


From 0a3212de8ab3e2ce5808c6265855e528d4a6767b Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naohiro.aota@wdc.com>
Date: Wed, 14 Dec 2022 11:06:07 +0900
Subject: btrfs: fix trace event name typo for FLUSH_DELAYED_REFS

Fix a typo of printing FLUSH_DELAYED_REFS event in flush_space() as
FLUSH_ELAYED_REFS.

Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
---
 include/trace/events/btrfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h
index 0bce0b4ff2fa..6548b5b5aa60 100644
--- a/include/trace/events/btrfs.h
+++ b/include/trace/events/btrfs.h
@@ -98,7 +98,7 @@ struct raid56_bio_trace_info;
 	EM( FLUSH_DELALLOC_WAIT,	"FLUSH_DELALLOC_WAIT")		\
 	EM( FLUSH_DELALLOC_FULL,	"FLUSH_DELALLOC_FULL")		\
 	EM( FLUSH_DELAYED_REFS_NR,	"FLUSH_DELAYED_REFS_NR")	\
-	EM( FLUSH_DELAYED_REFS,		"FLUSH_ELAYED_REFS")		\
+	EM( FLUSH_DELAYED_REFS,		"FLUSH_DELAYED_REFS")		\
 	EM( ALLOC_CHUNK,		"ALLOC_CHUNK")			\
 	EM( ALLOC_CHUNK_FORCE,		"ALLOC_CHUNK_FORCE")		\
 	EM( RUN_DELAYED_IPUTS,		"RUN_DELAYED_IPUTS")		\
-- 
cgit v1.3.1


From b18cba09e374637a0a3759d856a6bca94c133952 Mon Sep 17 00:00:00 2001
From: minoura makoto <minoura@valinux.co.jp>
Date: Tue, 13 Dec 2022 13:14:31 +0900
Subject: SUNRPC: ensure the matching upcall is in-flight upon downcall

Commit 9130b8dbc6ac ("SUNRPC: allow for upcalls for the same uid
but different gss service") introduced `auth` argument to
__gss_find_upcall(), but in gss_pipe_downcall() it was left as NULL
since it (and auth->service) was not (yet) determined.

When multiple upcalls with the same uid and different service are
ongoing, it could happen that __gss_find_upcall(), which returns the
first match found in the pipe->in_downcall list, could not find the
correct gss_msg corresponding to the downcall we are looking for.
Moreover, it might return a msg which is not sent to rpc.gssd yet.

We could see mount.nfs process hung in D state with multiple mount.nfs
are executed in parallel.  The call trace below is of CentOS 7.9
kernel-3.10.0-1160.24.1.el7.x86_64 but we observed the same hang w/
elrepo kernel-ml-6.0.7-1.el7.

PID: 71258  TASK: ffff91ebd4be0000  CPU: 36  COMMAND: "mount.nfs"
 #0 [ffff9203ca3234f8] __schedule at ffffffffa3b8899f
 #1 [ffff9203ca323580] schedule at ffffffffa3b88eb9
 #2 [ffff9203ca323590] gss_cred_init at ffffffffc0355818 [auth_rpcgss]
 #3 [ffff9203ca323658] rpcauth_lookup_credcache at ffffffffc0421ebc
[sunrpc]
 #4 [ffff9203ca3236d8] gss_lookup_cred at ffffffffc0353633 [auth_rpcgss]
 #5 [ffff9203ca3236e8] rpcauth_lookupcred at ffffffffc0421581 [sunrpc]
 #6 [ffff9203ca323740] rpcauth_refreshcred at ffffffffc04223d3 [sunrpc]
 #7 [ffff9203ca3237a0] call_refresh at ffffffffc04103dc [sunrpc]
 #8 [ffff9203ca3237b8] __rpc_execute at ffffffffc041e1c9 [sunrpc]
 #9 [ffff9203ca323820] rpc_execute at ffffffffc0420a48 [sunrpc]

The scenario is like this. Let's say there are two upcalls for
services A and B, A -> B in pipe->in_downcall, B -> A in pipe->pipe.

When rpc.gssd reads pipe to get the upcall msg corresponding to
service B from pipe->pipe and then writes the response, in
gss_pipe_downcall the msg corresponding to service A will be picked
because only uid is used to find the msg and it is before the one for
B in pipe->in_downcall.  And the process waiting for the msg
corresponding to service A will be woken up.

Actual scheduing of that process might be after rpc.gssd processes the
next msg.  In rpc_pipe_generic_upcall it clears msg->errno (for A).
The process is scheduled to see gss_msg->ctx == NULL and
gss_msg->msg.errno == 0, therefore it cannot break the loop in
gss_create_upcall and is never woken up after that.

This patch adds a simple check to ensure that a msg which is not
sent to rpc.gssd yet is not chosen as the matching upcall upon
receiving a downcall.

Signed-off-by: minoura makoto <minoura@valinux.co.jp>
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
Tested-by: Hiroshi Shimamoto <h-shimamoto@nec.com>
Cc: Trond Myklebust <trondmy@hammerspace.com>
Fixes: 9130b8dbc6ac ("SUNRPC: allow for upcalls for same uid but different gss service")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/rpc_pipe_fs.h |  5 +++++
 net/sunrpc/auth_gss/auth_gss.c     | 19 +++++++++++++++++--
 2 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/rpc_pipe_fs.h b/include/linux/sunrpc/rpc_pipe_fs.h
index cd188a527d16..3b35b6f6533a 100644
--- a/include/linux/sunrpc/rpc_pipe_fs.h
+++ b/include/linux/sunrpc/rpc_pipe_fs.h
@@ -92,6 +92,11 @@ extern ssize_t rpc_pipe_generic_upcall(struct file *, struct rpc_pipe_msg *,
 				       char __user *, size_t);
 extern int rpc_queue_upcall(struct rpc_pipe *, struct rpc_pipe_msg *);
 
+/* returns true if the msg is in-flight, i.e., already eaten by the peer */
+static inline bool rpc_msg_is_inflight(const struct rpc_pipe_msg *msg) {
+	return (msg->copied != 0 && list_empty(&msg->list));
+}
+
 struct rpc_clnt;
 extern struct dentry *rpc_create_client_dir(struct dentry *, const char *, struct rpc_clnt *);
 extern int rpc_remove_client_dir(struct rpc_clnt *);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 7bb247c51e2f..2d7b1e03110a 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -302,7 +302,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
 	list_for_each_entry(pos, &pipe->in_downcall, list) {
 		if (!uid_eq(pos->uid, uid))
 			continue;
-		if (auth && pos->auth->service != auth->service)
+		if (pos->auth->service != auth->service)
 			continue;
 		refcount_inc(&pos->count);
 		return pos;
@@ -686,6 +686,21 @@ out:
 	return err;
 }
 
+static struct gss_upcall_msg *
+gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
+{
+	struct gss_upcall_msg *pos;
+	list_for_each_entry(pos, &pipe->in_downcall, list) {
+		if (!uid_eq(pos->uid, uid))
+			continue;
+		if (!rpc_msg_is_inflight(&pos->msg))
+			continue;
+		refcount_inc(&pos->count);
+		return pos;
+	}
+	return NULL;
+}
+
 #define MSG_BUF_MAXSIZE 1024
 
 static ssize_t
@@ -732,7 +747,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	err = -ENOENT;
 	/* Find a matching upcall */
 	spin_lock(&pipe->lock);
-	gss_msg = __gss_find_upcall(pipe, uid, NULL);
+	gss_msg = gss_find_downcall(pipe, uid);
 	if (gss_msg == NULL) {
 		spin_unlock(&pipe->lock);
 		goto err_put_ctx;
-- 
cgit v1.3.1


From 4e699e34f923188175986ad8a74ab99f7034075e Mon Sep 17 00:00:00 2001
From: Ma Jun <majun@amd.com>
Date: Fri, 16 Dec 2022 11:05:26 +0800
Subject: drm/plane-helper: Add the missing declaration of drm_atomic_state

Add the missing declaration of struct drm_atomic_state to fix the
compile error below:

error: 'struct drm_atomic_state' declared inside parameter
list will not be visible outside of this definition or declaration [-Werror]

Signed-off-by: Ma Jun <majun@amd.com>
Reviewed-by: Thomas Zimmermann <tzimmermann@suse.de>
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Fixes: 8401bd361f59 ("drm/plane-helper: Add a drm_plane_helper_atomic_check() helper")
Cc: Javier Martinez Canillas <javierm@redhat.com>
Cc: Thomas Zimmermann <tzimmermann@suse.de>
Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: dri-devel@lists.freedesktop.org
Cc: <stable@vger.kernel.org> # v6.1+
Link: https://patchwork.freedesktop.org/patch/msgid/20221216030526.1335609-1-majun@amd.com
---
 include/drm/drm_plane_helper.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/drm/drm_plane_helper.h b/include/drm/drm_plane_helper.h
index ff83d2621687..3a574e8cd22f 100644
--- a/include/drm/drm_plane_helper.h
+++ b/include/drm/drm_plane_helper.h
@@ -26,6 +26,7 @@
 
 #include <linux/types.h>
 
+struct drm_atomic_state;
 struct drm_crtc;
 struct drm_framebuffer;
 struct drm_modeset_acquire_ctx;
-- 
cgit v1.3.1


From bed4a63ea4ae77cfe5aae004ef87379f0655260a Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 19 Dec 2022 20:07:52 +0100
Subject: netfilter: nf_tables: consolidate set description

Add the following fields to the set description:

- key type
- data type
- object type
- policy
- gc_int: garbage collection interval)
- timeout: element timeout

This prepares for stricter set type checks on updates in a follow up
patch.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 12 ++++++++
 net/netfilter/nf_tables_api.c     | 58 +++++++++++++++++++--------------------
 2 files changed, 40 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e69ce23566ea..4957b4775757 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -312,17 +312,29 @@ struct nft_set_iter {
 /**
  *	struct nft_set_desc - description of set elements
  *
+ *	@ktype: key type
  *	@klen: key length
+ *	@dtype: data type
  *	@dlen: data length
+ *	@objtype: object type
+ *	@flags: flags
  *	@size: number of set elements
+ *	@policy: set policy
+ *	@gc_int: garbage collector interval
  *	@field_len: length of each field in concatenation, bytes
  *	@field_count: number of concatenated fields in element
  *	@expr: set must support for expressions
  */
 struct nft_set_desc {
+	u32			ktype;
 	unsigned int		klen;
+	u32			dtype;
 	unsigned int		dlen;
+	u32			objtype;
 	unsigned int		size;
+	u32			policy;
+	u32			gc_int;
+	u64			timeout;
 	u8			field_len[NFT_REG32_COUNT];
 	u8			field_count;
 	bool			expr;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 832b881f7c17..1deecc1a6c00 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3780,8 +3780,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
 static const struct nft_set_ops *
 nft_select_set_ops(const struct nft_ctx *ctx,
 		   const struct nlattr * const nla[],
-		   const struct nft_set_desc *desc,
-		   enum nft_set_policies policy)
+		   const struct nft_set_desc *desc)
 {
 	struct nftables_pernet *nft_net = nft_pernet(ctx->net);
 	const struct nft_set_ops *ops, *bops;
@@ -3810,7 +3809,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
 		if (!ops->estimate(desc, flags, &est))
 			continue;
 
-		switch (policy) {
+		switch (desc->policy) {
 		case NFT_SET_POL_PERFORMANCE:
 			if (est.lookup < best.lookup)
 				break;
@@ -4392,7 +4391,6 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
 static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 			    const struct nlattr * const nla[])
 {
-	u32 ktype, dtype, flags, policy, gc_int, objtype;
 	struct netlink_ext_ack *extack = info->extack;
 	u8 genmask = nft_genmask_next(info->net);
 	u8 family = info->nfmsg->nfgen_family;
@@ -4405,10 +4403,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 	struct nft_set *set;
 	struct nft_ctx ctx;
 	size_t alloc_size;
-	u64 timeout;
 	char *name;
 	int err, i;
 	u16 udlen;
+	u32 flags;
 	u64 size;
 
 	if (nla[NFTA_SET_TABLE] == NULL ||
@@ -4419,10 +4417,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 
 	memset(&desc, 0, sizeof(desc));
 
-	ktype = NFT_DATA_VALUE;
+	desc.ktype = NFT_DATA_VALUE;
 	if (nla[NFTA_SET_KEY_TYPE] != NULL) {
-		ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
-		if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+		desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+		if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
 			return -EINVAL;
 	}
 
@@ -4447,17 +4445,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 			return -EOPNOTSUPP;
 	}
 
-	dtype = 0;
+	desc.dtype = 0;
 	if (nla[NFTA_SET_DATA_TYPE] != NULL) {
 		if (!(flags & NFT_SET_MAP))
 			return -EINVAL;
 
-		dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
-		if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
-		    dtype != NFT_DATA_VERDICT)
+		desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+		if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+		    desc.dtype != NFT_DATA_VERDICT)
 			return -EINVAL;
 
-		if (dtype != NFT_DATA_VERDICT) {
+		if (desc.dtype != NFT_DATA_VERDICT) {
 			if (nla[NFTA_SET_DATA_LEN] == NULL)
 				return -EINVAL;
 			desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
@@ -4472,34 +4470,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 		if (!(flags & NFT_SET_OBJECT))
 			return -EINVAL;
 
-		objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
-		if (objtype == NFT_OBJECT_UNSPEC ||
-		    objtype > NFT_OBJECT_MAX)
+		desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+		if (desc.objtype == NFT_OBJECT_UNSPEC ||
+		    desc.objtype > NFT_OBJECT_MAX)
 			return -EOPNOTSUPP;
 	} else if (flags & NFT_SET_OBJECT)
 		return -EINVAL;
 	else
-		objtype = NFT_OBJECT_UNSPEC;
+		desc.objtype = NFT_OBJECT_UNSPEC;
 
-	timeout = 0;
+	desc.timeout = 0;
 	if (nla[NFTA_SET_TIMEOUT] != NULL) {
 		if (!(flags & NFT_SET_TIMEOUT))
 			return -EINVAL;
 
-		err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
+		err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
 		if (err)
 			return err;
 	}
-	gc_int = 0;
+	desc.gc_int = 0;
 	if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
 		if (!(flags & NFT_SET_TIMEOUT))
 			return -EINVAL;
-		gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+		desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
 	}
 
-	policy = NFT_SET_POL_PERFORMANCE;
+	desc.policy = NFT_SET_POL_PERFORMANCE;
 	if (nla[NFTA_SET_POLICY] != NULL)
-		policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+		desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
 
 	if (nla[NFTA_SET_DESC] != NULL) {
 		err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
@@ -4544,7 +4542,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 	if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
 		return -ENOENT;
 
-	ops = nft_select_set_ops(&ctx, nla, &desc, policy);
+	ops = nft_select_set_ops(&ctx, nla, &desc);
 	if (IS_ERR(ops))
 		return PTR_ERR(ops);
 
@@ -4584,18 +4582,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 	set->table = table;
 	write_pnet(&set->net, net);
 	set->ops = ops;
-	set->ktype = ktype;
+	set->ktype = desc.ktype;
 	set->klen = desc.klen;
-	set->dtype = dtype;
-	set->objtype = objtype;
+	set->dtype = desc.dtype;
+	set->objtype = desc.objtype;
 	set->dlen = desc.dlen;
 	set->flags = flags;
 	set->size = desc.size;
-	set->policy = policy;
+	set->policy = desc.policy;
 	set->udlen = udlen;
 	set->udata = udata;
-	set->timeout = timeout;
-	set->gc_int = gc_int;
+	set->timeout = desc.timeout;
+	set->gc_int = desc.gc_int;
 
 	set->field_count = desc.field_count;
 	for (i = 0; i < desc.field_count; i++)
-- 
cgit v1.3.1


From 123b99619cca94bdca0bf7bde9abe28f0a0dfe06 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 19 Dec 2022 20:10:12 +0100
Subject: netfilter: nf_tables: honor set timeout and garbage collection
 updates

Set timeout and garbage collection interval updates are ignored on
updates. Add transaction to update global set element timeout and
garbage collection interval.

Fixes: 96518518cc41 ("netfilter: add nftables")
Suggested-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h | 13 +++++++-
 net/netfilter/nf_tables_api.c     | 63 ++++++++++++++++++++++++++++-----------
 2 files changed, 57 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4957b4775757..9430128aae99 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -597,7 +597,9 @@ void *nft_set_catchall_gc(const struct nft_set *set);
 
 static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
 {
-	return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
+	u32 gc_int = READ_ONCE(set->gc_int);
+
+	return gc_int ? msecs_to_jiffies(gc_int) : HZ;
 }
 
 /**
@@ -1570,6 +1572,9 @@ struct nft_trans_rule {
 struct nft_trans_set {
 	struct nft_set			*set;
 	u32				set_id;
+	u32				gc_int;
+	u64				timeout;
+	bool				update;
 	bool				bound;
 };
 
@@ -1579,6 +1584,12 @@ struct nft_trans_set {
 	(((struct nft_trans_set *)trans->data)->set_id)
 #define nft_trans_set_bound(trans)	\
 	(((struct nft_trans_set *)trans->data)->bound)
+#define nft_trans_set_update(trans)	\
+	(((struct nft_trans_set *)trans->data)->update)
+#define nft_trans_set_timeout(trans)	\
+	(((struct nft_trans_set *)trans->data)->timeout)
+#define nft_trans_set_gc_int(trans)	\
+	(((struct nft_trans_set *)trans->data)->gc_int)
 
 struct nft_trans_chain {
 	bool				update;
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 319887f4d3ef..8c09e4d12ac1 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
 	return 0;
 }
 
-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
-			     struct nft_set *set)
+static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+			       struct nft_set *set,
+			       const struct nft_set_desc *desc)
 {
 	struct nft_trans *trans;
 
@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
 	if (trans == NULL)
 		return -ENOMEM;
 
-	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+	if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
 		nft_trans_set_id(trans) =
 			ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
 		nft_activate_next(ctx->net, set);
 	}
 	nft_trans_set(trans) = set;
+	if (desc) {
+		nft_trans_set_update(trans) = true;
+		nft_trans_set_gc_int(trans) = desc->gc_int;
+		nft_trans_set_timeout(trans) = desc->timeout;
+	}
 	nft_trans_commit_list_add_tail(ctx->net, trans);
 
 	return 0;
 }
 
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+			     struct nft_set *set)
+{
+	return __nft_trans_set_add(ctx, msg_type, set, NULL);
+}
+
 static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
 {
 	int err;
@@ -4044,8 +4056,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
 static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 			      const struct nft_set *set, u16 event, u16 flags)
 {
-	struct nlmsghdr *nlh;
+	u64 timeout = READ_ONCE(set->timeout);
+	u32 gc_int = READ_ONCE(set->gc_int);
 	u32 portid = ctx->portid;
+	struct nlmsghdr *nlh;
 	struct nlattr *nest;
 	u32 seq = ctx->seq;
 	int i;
@@ -4081,13 +4095,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 	    nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
 		goto nla_put_failure;
 
-	if (set->timeout &&
+	if (timeout &&
 	    nla_put_be64(skb, NFTA_SET_TIMEOUT,
-			 nf_jiffies64_to_msecs(set->timeout),
+			 nf_jiffies64_to_msecs(timeout),
 			 NFTA_SET_PAD))
 		goto nla_put_failure;
-	if (set->gc_int &&
-	    nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+	if (gc_int &&
+	    nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
 		goto nla_put_failure;
 
 	if (set->policy != NFT_SET_POL_PERFORMANCE) {
@@ -4632,7 +4646,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
 		for (i = 0; i < num_exprs; i++)
 			nft_expr_destroy(&ctx, exprs[i]);
 
-		return err;
+		if (err < 0)
+			return err;
+
+		return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
 	}
 
 	if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
@@ -6070,7 +6087,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			return err;
 	} else if (set->flags & NFT_SET_TIMEOUT &&
 		   !(flags & NFT_SET_ELEM_INTERVAL_END)) {
-		timeout = set->timeout;
+		timeout = READ_ONCE(set->timeout);
 	}
 
 	expiration = 0;
@@ -6171,7 +6188,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 		if (err < 0)
 			goto err_parse_key_end;
 
-		if (timeout != set->timeout) {
+		if (timeout != READ_ONCE(set->timeout)) {
 			err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
 			if (err < 0)
 				goto err_parse_key_end;
@@ -9093,14 +9110,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 				nft_flow_rule_destroy(nft_trans_flow_rule(trans));
 			break;
 		case NFT_MSG_NEWSET:
-			nft_clear(net, nft_trans_set(trans));
-			/* This avoids hitting -EBUSY when deleting the table
-			 * from the transaction.
-			 */
-			if (nft_set_is_anonymous(nft_trans_set(trans)) &&
-			    !list_empty(&nft_trans_set(trans)->bindings))
-				trans->ctx.table->use--;
+			if (nft_trans_set_update(trans)) {
+				struct nft_set *set = nft_trans_set(trans);
 
+				WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
+				WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
+			} else {
+				nft_clear(net, nft_trans_set(trans));
+				/* This avoids hitting -EBUSY when deleting the table
+				 * from the transaction.
+				 */
+				if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+				    !list_empty(&nft_trans_set(trans)->bindings))
+					trans->ctx.table->use--;
+			}
 			nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
 					     NFT_MSG_NEWSET, GFP_KERNEL);
 			nft_trans_destroy(trans);
@@ -9322,6 +9345,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWSET:
+			if (nft_trans_set_update(trans)) {
+				nft_trans_destroy(trans);
+				break;
+			}
 			trans->ctx.table->use--;
 			if (nft_trans_set_bound(trans)) {
 				nft_trans_destroy(trans);
-- 
cgit v1.3.1


From 00a734104af7d878f1252d49eff9298785c6cbdc Mon Sep 17 00:00:00 2001
From: Mario Limonciello <mario.limonciello@amd.com>
Date: Thu, 8 Dec 2022 10:42:05 -0600
Subject: ACPI: video: Allow GPU drivers to report no panels

The current logic for the ACPI backlight detection will create
a backlight device if no native or vendor drivers have created
8 seconds after the system has booted if the ACPI tables
included backlight control methods.

If the GPU drivers have loaded, they may be able to report whether
any LCD panels were found.  Allow using this information to factor
in whether to enable the fallback logic for making an acpi_video0
backlight device.

Suggested-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_video.c | 11 +++++++++++
 include/acpi/video.h      |  2 ++
 2 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/drivers/acpi/acpi_video.c b/drivers/acpi/acpi_video.c
index 30d8fd03fec7..75dc37affff2 100644
--- a/drivers/acpi/acpi_video.c
+++ b/drivers/acpi/acpi_video.c
@@ -2176,6 +2176,17 @@ static bool should_check_lcd_flag(void)
 	return false;
 }
 
+/*
+ * At least one graphics driver has reported that no LCD is connected
+ * via the native interface. cancel the registration for fallback acpi_video0.
+ * If another driver still deems this necessary, it can explicitly register it.
+ */
+void acpi_video_report_nolcd(void)
+{
+	cancel_delayed_work(&video_bus_register_backlight_work);
+}
+EXPORT_SYMBOL(acpi_video_report_nolcd);
+
 int acpi_video_register(void)
 {
 	int ret = 0;
diff --git a/include/acpi/video.h b/include/acpi/video.h
index a275c35e5249..8ed9bec03e53 100644
--- a/include/acpi/video.h
+++ b/include/acpi/video.h
@@ -53,6 +53,7 @@ enum acpi_backlight_type {
 };
 
 #if IS_ENABLED(CONFIG_ACPI_VIDEO)
+extern void acpi_video_report_nolcd(void);
 extern int acpi_video_register(void);
 extern void acpi_video_unregister(void);
 extern void acpi_video_register_backlight(void);
@@ -69,6 +70,7 @@ extern int acpi_video_get_levels(struct acpi_device *device,
 				 struct acpi_video_device_brightness **dev_br,
 				 int *pmax_level);
 #else
+static inline void acpi_video_report_nolcd(void) { return; };
 static inline int acpi_video_register(void) { return -ENODEV; }
 static inline void acpi_video_unregister(void) { return; }
 static inline void acpi_video_register_backlight(void) { return; }
-- 
cgit v1.3.1


From 7fac54b93ad13e5e7ac237af33eb2a0940eaeea0 Mon Sep 17 00:00:00 2001
From: Rong Tao <rongtao@cestc.cn>
Date: Wed, 21 Dec 2022 20:36:27 +0800
Subject: atm: uapi: fix spelling typos in comments

Fix the typo of 'Unsuported' in atmbr2684.h

Signed-off-by: Rong Tao <rongtao@cestc.cn>
Link: https://lore.kernel.org/r/tencent_F1354BEC925C65EA357E741E91DF2044E805@qq.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/atmbr2684.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/atmbr2684.h b/include/uapi/linux/atmbr2684.h
index a9e2250cd720..d47c47d06f11 100644
--- a/include/uapi/linux/atmbr2684.h
+++ b/include/uapi/linux/atmbr2684.h
@@ -38,7 +38,7 @@
  */
 #define BR2684_ENCAPS_VC	(0)	/* VC-mux */
 #define BR2684_ENCAPS_LLC	(1)
-#define BR2684_ENCAPS_AUTODETECT (2)	/* Unsuported */
+#define BR2684_ENCAPS_AUTODETECT (2)	/* Unsupported */
 
 /*
  * Is this VC bridged or routed?
-- 
cgit v1.3.1


From b0305c1e0e27ad91187bc6d5ac3d502799faf239 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw@amazon.co.uk>
Date: Mon, 26 Dec 2022 12:03:19 +0000
Subject: KVM: x86/xen: Add KVM_XEN_INVALID_GPA and KVM_XEN_INVALID_GFN to uapi

These are (uint64_t)-1 magic values are a userspace ABI, allowing the
shared info pages and other enlightenments to be disabled. This isn't
a Xen ABI because Xen doesn't let the guest turn these off except with
the full SHUTDOWN_soft_reset mechanism. Under KVM, the userspace VMM is
expected to handle soft reset, and tear down the kernel parts of the
enlightenments accordingly.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <20221226120320.1125390-5-dwmw2@infradead.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/xen.c       | 14 +++++++-------
 include/uapi/linux/kvm.h |  3 +++
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index bddbe5ac5cfa..b178f40bd863 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -41,7 +41,7 @@ static int kvm_xen_shared_info_init(struct kvm *kvm, gfn_t gfn)
 	int ret = 0;
 	int idx = srcu_read_lock(&kvm->srcu);
 
-	if (gfn == GPA_INVALID) {
+	if (gfn == KVM_XEN_INVALID_GFN) {
 		kvm_gpc_deactivate(gpc);
 		goto out;
 	}
@@ -659,7 +659,7 @@ int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data)
 		if (kvm->arch.xen.shinfo_cache.active)
 			data->u.shared_info.gfn = gpa_to_gfn(kvm->arch.xen.shinfo_cache.gpa);
 		else
-			data->u.shared_info.gfn = GPA_INVALID;
+			data->u.shared_info.gfn = KVM_XEN_INVALID_GFN;
 		r = 0;
 		break;
 
@@ -705,7 +705,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
 			     offsetof(struct compat_vcpu_info, time));
 
-		if (data->u.gpa == GPA_INVALID) {
+		if (data->u.gpa == KVM_XEN_INVALID_GPA) {
 			kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_info_cache);
 			r = 0;
 			break;
@@ -719,7 +719,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		break;
 
 	case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
-		if (data->u.gpa == GPA_INVALID) {
+		if (data->u.gpa == KVM_XEN_INVALID_GPA) {
 			kvm_gpc_deactivate(&vcpu->arch.xen.vcpu_time_info_cache);
 			r = 0;
 			break;
@@ -739,7 +739,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 			r = -EOPNOTSUPP;
 			break;
 		}
-		if (data->u.gpa == GPA_INVALID) {
+		if (data->u.gpa == KVM_XEN_INVALID_GPA) {
 			r = 0;
 		deactivate_out:
 			kvm_gpc_deactivate(&vcpu->arch.xen.runstate_cache);
@@ -937,7 +937,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		if (vcpu->arch.xen.vcpu_info_cache.active)
 			data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
 		else
-			data->u.gpa = GPA_INVALID;
+			data->u.gpa = KVM_XEN_INVALID_GPA;
 		r = 0;
 		break;
 
@@ -945,7 +945,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 		if (vcpu->arch.xen.vcpu_time_info_cache.active)
 			data->u.gpa = vcpu->arch.xen.vcpu_time_info_cache.gpa;
 		else
-			data->u.gpa = GPA_INVALID;
+			data->u.gpa = KVM_XEN_INVALID_GPA;
 		r = 0;
 		break;
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 20522d4ba1e0..55155e262646 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1767,6 +1767,7 @@ struct kvm_xen_hvm_attr {
 		__u8 runstate_update_flag;
 		struct {
 			__u64 gfn;
+#define KVM_XEN_INVALID_GFN ((__u64)-1)
 		} shared_info;
 		struct {
 			__u32 send_port;
@@ -1798,6 +1799,7 @@ struct kvm_xen_hvm_attr {
 	} u;
 };
 
+
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
 #define KVM_XEN_ATTR_TYPE_LONG_MODE		0x0
 #define KVM_XEN_ATTR_TYPE_SHARED_INFO		0x1
@@ -1823,6 +1825,7 @@ struct kvm_xen_vcpu_attr {
 	__u16 pad[3];
 	union {
 		__u64 gpa;
+#define KVM_XEN_INVALID_GPA ((__u64)-1)
 		__u64 pad[8];
 		struct {
 			__u64 state;
-- 
cgit v1.3.1


From 9eb803402a2a83400c6c6afd900e3b7c87c06816 Mon Sep 17 00:00:00 2001
From: Stefan Metzmacher <metze@samba.org>
Date: Wed, 16 Nov 2022 21:25:24 +0100
Subject: uapi:io_uring.h: allow linux/time_types.h to be skipped

include/uapi/linux/io_uring.h is synced 1:1 into
liburing:src/include/liburing/io_uring.h.

liburing has a configure check to detect the need for
linux/time_types.h. It can opt-out by defining
UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H

Fixes: 78a861b94959 ("io_uring: add sync cancelation API through io_uring_register()")
Link: https://github.com/axboe/liburing/issues/708
Link: https://github.com/axboe/liburing/pull/709
Link: https://lore.kernel.org/io-uring/20221115212614.1308132-1-ammar.faizi@intel.com/T/#m9f5dd571cd4f6a5dee84452dbbca3b92ba7a4091
CC: Jens Axboe <axboe@kernel.dk>
Cc: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Signed-off-by: Stefan Metzmacher <metze@samba.org>
Reviewed-by: Ammar Faizi <ammarfaizi2@gnuweeb.org>
Link: https://lore.kernel.org/r/7071a0a1d751221538b20b63f9160094fc7e06f4.1668630247.git.metze@samba.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/io_uring.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h
index 9d4c4078e8d0..2780bce62faf 100644
--- a/include/uapi/linux/io_uring.h
+++ b/include/uapi/linux/io_uring.h
@@ -10,7 +10,15 @@
 
 #include <linux/fs.h>
 #include <linux/types.h>
+/*
+ * this file is shared with liburing and that has to autodetect
+ * if linux/time_types.h is available or not, it can
+ * define UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
+ * if linux/time_types.h is not available
+ */
+#ifndef UAPI_LINUX_IO_URING_H_SKIP_LINUX_TIME_TYPES_H
 #include <linux/time_types.h>
+#endif
 
 #ifdef __cplusplus
 extern "C" {
-- 
cgit v1.3.1


From 0e50d999903c009b6a9cd2277c82d6798d982e31 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Sat, 24 Dec 2022 14:49:00 +0000
Subject: rxrpc: Fix a couple of potential use-after-frees

At the end of rxrpc_recvmsg(), if a call is found, the call is put and then
a trace line is emitted referencing that call in a couple of places - but
the call may have been deallocated by the time those traces happen.

Fix this by stashing the call debug_id in a variable and passing that to
the tracepoint rather than the call pointer.

Fixes: 849979051cbc ("rxrpc: Add a tracepoint to follow what recvmsg does")
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/rxrpc.h |  6 +++---
 net/rxrpc/recvmsg.c          | 14 ++++++++------
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index c6cfed00d0c6..5f9dd7389536 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -1062,10 +1062,10 @@ TRACE_EVENT(rxrpc_receive,
 	    );
 
 TRACE_EVENT(rxrpc_recvmsg,
-	    TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why,
+	    TP_PROTO(unsigned int call_debug_id, enum rxrpc_recvmsg_trace why,
 		     int ret),
 
-	    TP_ARGS(call, why, ret),
+	    TP_ARGS(call_debug_id, why, ret),
 
 	    TP_STRUCT__entry(
 		    __field(unsigned int,		call		)
@@ -1074,7 +1074,7 @@ TRACE_EVENT(rxrpc_recvmsg,
 			     ),
 
 	    TP_fast_assign(
-		    __entry->call = call ? call->debug_id : 0;
+		    __entry->call = call_debug_id;
 		    __entry->why = why;
 		    __entry->ret = ret;
 			   ),
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 36b25d003cf0..6ebd6440a2b7 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -388,13 +388,14 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	struct rxrpc_call *call;
 	struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
 	struct list_head *l;
+	unsigned int call_debug_id = 0;
 	size_t copied = 0;
 	long timeo;
 	int ret;
 
 	DEFINE_WAIT(wait);
 
-	trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0);
+	trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0);
 
 	if (flags & (MSG_OOB | MSG_TRUNC))
 		return -EOPNOTSUPP;
@@ -431,7 +432,7 @@ try_again:
 		if (list_empty(&rx->recvmsg_q)) {
 			if (signal_pending(current))
 				goto wait_interrupted;
-			trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 0);
+			trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0);
 			timeo = schedule_timeout(timeo);
 		}
 		finish_wait(sk_sleep(&rx->sk), &wait);
@@ -450,7 +451,8 @@ try_again:
 		rxrpc_get_call(call, rxrpc_call_get_recvmsg);
 	write_unlock(&rx->recvmsg_lock);
 
-	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0);
+	call_debug_id = call->debug_id;
+	trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
 
 	/* We're going to drop the socket lock, so we need to lock the call
 	 * against interference by sendmsg.
@@ -531,7 +533,7 @@ try_again:
 error_unlock_call:
 	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put_recvmsg);
-	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+	trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
 	return ret;
 
 error_requeue_call:
@@ -539,14 +541,14 @@ error_requeue_call:
 		write_lock(&rx->recvmsg_lock);
 		list_add(&call->recvmsg_link, &rx->recvmsg_q);
 		write_unlock(&rx->recvmsg_lock);
-		trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0);
+		trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
 	} else {
 		rxrpc_put_call(call, rxrpc_call_put_recvmsg);
 	}
 error_no_call:
 	release_sock(&rx->sk);
 error_trace:
-	trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+	trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
 	return ret;
 
 wait_interrupted:
-- 
cgit v1.3.1


From b9e05399d9273c8c066e73db1e6e85364003030c Mon Sep 17 00:00:00 2001
From: Si-Wei Liu <si-wei.liu@oracle.com>
Date: Mon, 10 Oct 2022 10:27:03 -0700
Subject: vdpa: merge functionally duplicated dev_features attributes

We can merge VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES with
VDPA_ATTR_DEV_FEATURES which is functionally equivalent.
While at it, tweak the comment in header file to make
user provioned device features distinguished from those
supported by the parent mgmtdev device: the former of
which can be inherited as a whole from the latter, or
can be a subset of the latter if explicitly specified.

Signed-off-by: Si-Wei Liu <si-wei.liu@oracle.com>
Message-Id: <1665422823-18364-1-git-send-email-si-wei.liu@oracle.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
---
 drivers/vdpa/vdpa.c       | 2 +-
 include/uapi/linux/vdpa.h | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c
index febdc99b51a7..41ed56362992 100644
--- a/drivers/vdpa/vdpa.c
+++ b/drivers/vdpa/vdpa.c
@@ -855,7 +855,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms
 
 	features_device = vdev->config->get_device_features(vdev);
 
-	if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device,
+	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device,
 			      VDPA_ATTR_PAD))
 		return -EMSGSIZE;
 
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
index 9bd79235c875..54b649ab0f22 100644
--- a/include/uapi/linux/vdpa.h
+++ b/include/uapi/linux/vdpa.h
@@ -53,11 +53,9 @@ enum vdpa_attr {
 	VDPA_ATTR_DEV_VENDOR_ATTR_NAME,		/* string */
 	VDPA_ATTR_DEV_VENDOR_ATTR_VALUE,        /* u64 */
 
+	/* virtio features that are provisioned to the vDPA device */
 	VDPA_ATTR_DEV_FEATURES,                 /* u64 */
 
-	/* virtio features that are supported by the vDPA device */
-	VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES,	/* u64 */
-
 	/* new attributes must be added above here */
 	VDPA_ATTR_MAX,
 };
-- 
cgit v1.3.1


From 40cab44b9089a41f71bbd0eff753eb91d5dafd68 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@mojatatu.com>
Date: Tue, 27 Dec 2022 11:04:59 -0300
Subject: net/sched: fix retpoline wrapper compilation on configs without tc
 filters

Rudi reports a compilation failure on x86_64 when CONFIG_NET_CLS or
CONFIG_NET_CLS_ACT is not set but CONFIG_RETPOLINE is set.
A misplaced '#endif' was causing the issue.

Fixes: 7f0e810220e2 ("net/sched: add retpoline wrapper for tc")

Tested-by: Rudi Heitbaum <rudi@heitbaum.com>
Signed-off-by: Pedro Tammela <pctammela@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_wrapper.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/tc_wrapper.h b/include/net/tc_wrapper.h
index ceed2fc089ff..d323fffb839a 100644
--- a/include/net/tc_wrapper.h
+++ b/include/net/tc_wrapper.h
@@ -216,6 +216,8 @@ skip:
 	return tp->classify(skb, tp, res);
 }
 
+#endif /* CONFIG_NET_CLS */
+
 static inline void tc_wrapper_init(void)
 {
 #ifdef CONFIG_X86
@@ -224,8 +226,6 @@ static inline void tc_wrapper_init(void)
 #endif
 }
 
-#endif /* CONFIG_NET_CLS */
-
 #else
 
 #define TC_INDIRECT_SCOPE static
-- 
cgit v1.3.1


From 685e6311637e46f3212439ce2789f8a300e5050f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 21 Dec 2022 10:30:45 +0100
Subject: nvme: fix the NVME_CMD_EFFECTS_CSE_MASK definition

3 << 16 does not generate the correct mask for bits 16, 17 and 18.
Use the GENMASK macro to generate the correct mask instead.

Fixes: 84fef62d135b ("nvme: check admin passthru command effects")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
---
 include/linux/nvme.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index d6be2a686100..d1cd53f2b6ab 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -7,6 +7,7 @@
 #ifndef _LINUX_NVME_H
 #define _LINUX_NVME_H
 
+#include <linux/bits.h>
 #include <linux/types.h>
 #include <linux/uuid.h>
 
@@ -639,7 +640,7 @@ enum {
 	NVME_CMD_EFFECTS_NCC		= 1 << 2,
 	NVME_CMD_EFFECTS_NIC		= 1 << 3,
 	NVME_CMD_EFFECTS_CCC		= 1 << 4,
-	NVME_CMD_EFFECTS_CSE_MASK	= 3 << 16,
+	NVME_CMD_EFFECTS_CSE_MASK	= GENMASK(18, 16),
 	NVME_CMD_EFFECTS_UUID_SEL	= 1 << 19,
 };
 
-- 
cgit v1.3.1


From 6f99ac04c469b5d0a180a4ccea99d25d5dc9d21c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 13 Dec 2022 16:13:38 +0100
Subject: nvme: consult the CSE log page for unprivileged passthrough

Commands like Write Zeros can change the contents of a namespaces without
actually transferring data.  To protect against this, check the Commands
Supported and Effects log is supported by the controller for any
unprivileg command passthrough and refuse unprivileged passthrough if the
command has any effects that can change data or metadata.

Note: While the Commands Support and Effects log page has only been
mandatory since NVMe 2.0, it is widely supported because Windows requires
it for any command passthrough from userspace.

Fixes: e4fbcf32c860 ("nvme: identify-namespace without CAP_SYS_ADMIN")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
---
 drivers/nvme/host/ioctl.c | 28 ++++++++++++++++++++++++----
 include/linux/nvme.h      |  1 +
 2 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 9ddda571f046..a8639919237e 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -11,6 +11,8 @@
 static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
 		fmode_t mode)
 {
+	u32 effects;
+
 	if (capable(CAP_SYS_ADMIN))
 		return true;
 
@@ -43,11 +45,29 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
 	}
 
 	/*
-	 * Only allow I/O commands that transfer data to the controller if the
-	 * special file is open for writing, but always allow I/O commands that
-	 * transfer data from the controller.
+	 * Check if the controller provides a Commands Supported and Effects log
+	 * and marks this command as supported.  If not reject unprivileged
+	 * passthrough.
+	 */
+	effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
+	if (!(effects & NVME_CMD_EFFECTS_CSUPP))
+		return false;
+
+	/*
+	 * Don't allow passthrough for command that have intrusive (or unknown)
+	 * effects.
+	 */
+	if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
+			NVME_CMD_EFFECTS_UUID_SEL |
+			NVME_CMD_EFFECTS_SCOPE_MASK))
+		return false;
+
+	/*
+	 * Only allow I/O commands that transfer data to the controller or that
+	 * change the logical block contents if the file descriptor is open for
+	 * writing.
 	 */
-	if (nvme_is_write(c))
+	if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
 		return mode & FMODE_WRITE;
 	return true;
 }
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index d1cd53f2b6ab..4fad4aa245fb 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -642,6 +642,7 @@ enum {
 	NVME_CMD_EFFECTS_CCC		= 1 << 4,
 	NVME_CMD_EFFECTS_CSE_MASK	= GENMASK(18, 16),
 	NVME_CMD_EFFECTS_UUID_SEL	= 1 << 19,
+	NVME_CMD_EFFECTS_SCOPE_MASK	= GENMASK(31, 20),
 };
 
 struct nvme_effects_log {
-- 
cgit v1.3.1


From 1f0ae22ab470946143485a02cc1cd7e05c0f9120 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Mon, 12 Dec 2022 10:42:15 +0200
Subject: net/mlx5: E-Switch, properly handle ingress tagged packets on VST

Fix SRIOV VST mode behavior to insert cvlan when a guest tag is already
present in the frame. Previous VST mode behavior was to drop packets or
override existing tag, depending on the device version.

In this patch we fix this behavior by correctly building the HW steering
rule with a push vlan action, or for older devices we ask the FW to stack
the vlan when a vlan is already present.

Fixes: 07bab9502641 ("net/mlx5: E-Switch, Refactor eswitch ingress acl codes")
Fixes: dfcb1ed3c331 ("net/mlx5: E-Switch, Vport ingress/egress ACLs rules for VST mode")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 .../mellanox/mlx5/core/esw/acl/egress_lgcy.c       |  7 ++++-
 .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c      | 33 ++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 30 ++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  6 ++++
 include/linux/mlx5/device.h                        |  5 ++++
 include/linux/mlx5/mlx5_ifc.h                      |  3 +-
 6 files changed, 68 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
index 60a73990017c..6b4c9ffad95b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -67,6 +67,7 @@ static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
 int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 			      struct mlx5_vport *vport)
 {
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	struct mlx5_flow_destination drop_ctr_dst = {};
 	struct mlx5_flow_destination *dst = NULL;
 	struct mlx5_fc *drop_counter = NULL;
@@ -77,6 +78,7 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 	 */
 	int table_size = 2;
 	int dest_num = 0;
+	int actions_flag;
 	int err = 0;
 
 	if (vport->egress.legacy.drop_counter) {
@@ -119,8 +121,11 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
 		  vport->vport, vport->info.vlan, vport->info.qos);
 
 	/* Allowed vlan rule */
+	actions_flag = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	if (vst_mode_steering)
+		actions_flag |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
 	err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
-					 MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+					 actions_flag);
 	if (err)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
index b1a5199260f6..093ed86a0acd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -139,11 +139,14 @@ static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
 int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 			       struct mlx5_vport *vport)
 {
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	struct mlx5_flow_destination drop_ctr_dst = {};
 	struct mlx5_flow_destination *dst = NULL;
 	struct mlx5_flow_act flow_act = {};
 	struct mlx5_flow_spec *spec = NULL;
 	struct mlx5_fc *counter = NULL;
+	bool vst_check_cvlan = false;
+	bool vst_push_cvlan = false;
 	/* The ingress acl table contains 4 groups
 	 * (2 active rules at the same time -
 	 *      1 allow rule from one of the first 3 groups.
@@ -203,7 +206,26 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 		goto out;
 	}
 
-	if (vport->info.vlan || vport->info.qos)
+	if ((vport->info.vlan || vport->info.qos)) {
+		if (vst_mode_steering)
+			vst_push_cvlan = true;
+		else if (!MLX5_CAP_ESW(esw->dev, vport_cvlan_insert_always))
+			vst_check_cvlan = true;
+	}
+
+	if (vst_check_cvlan || vport->info.spoofchk)
+		spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+
+	/* Create ingress allow rule */
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	if (vst_push_cvlan) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH;
+		flow_act.vlan[0].prio = vport->info.qos;
+		flow_act.vlan[0].vid = vport->info.vlan;
+		flow_act.vlan[0].ethtype = ETH_P_8021Q;
+	}
+
+	if (vst_check_cvlan)
 		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
 				 outer_headers.cvlan_tag);
 
@@ -218,9 +240,6 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 		ether_addr_copy(smac_v, vport->info.mac);
 	}
 
-	/* Create ingress allow rule */
-	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 	vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
 							&flow_act, NULL, 0);
 	if (IS_ERR(vport->ingress.allow_rule)) {
@@ -232,6 +251,9 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 		goto out;
 	}
 
+	if (!vst_check_cvlan && !vport->info.spoofchk)
+		goto out;
+
 	memset(&flow_act, 0, sizeof(flow_act));
 	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	/* Attach drop flow counter */
@@ -257,7 +279,8 @@ int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
 	return 0;
 
 out:
-	esw_acl_ingress_lgcy_cleanup(esw, vport);
+	if (err)
+		esw_acl_ingress_lgcy_cleanup(esw, vport);
 	kvfree(spec);
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 527e4bffda8d..0dfd5742c6fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -161,10 +161,17 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
 			 esw_vport_context.vport_cvlan_strip, 1);
 
 	if (set_flags & SET_VLAN_INSERT) {
-		/* insert only if no vlan in packet */
-		MLX5_SET(modify_esw_vport_context_in, in,
-			 esw_vport_context.vport_cvlan_insert, 1);
-
+		if (MLX5_CAP_ESW(dev, vport_cvlan_insert_always)) {
+			/* insert either if vlan exist in packet or not */
+			MLX5_SET(modify_esw_vport_context_in, in,
+				 esw_vport_context.vport_cvlan_insert,
+				 MLX5_VPORT_CVLAN_INSERT_ALWAYS);
+		} else {
+			/* insert only if no vlan in packet */
+			MLX5_SET(modify_esw_vport_context_in, in,
+				 esw_vport_context.vport_cvlan_insert,
+				 MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN);
+		}
 		MLX5_SET(modify_esw_vport_context_in, in,
 			 esw_vport_context.cvlan_pcp, qos);
 		MLX5_SET(modify_esw_vport_context_in, in,
@@ -809,6 +816,7 @@ out_free:
 
 static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 {
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	u16 vport_num = vport->vport;
 	int flags;
 	int err;
@@ -839,8 +847,9 @@ static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 
 	flags = (vport->info.vlan || vport->info.qos) ?
 		SET_VLAN_STRIP | SET_VLAN_INSERT : 0;
-	modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
-			       vport->info.qos, flags);
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering)
+		modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan,
+				       vport->info.qos, flags);
 
 	return 0;
 
@@ -1848,6 +1857,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  u16 vport, u16 vlan, u8 qos, u8 set_flags)
 {
 	struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport);
+	bool vst_mode_steering = esw_vst_mode_is_steering(esw);
 	int err = 0;
 
 	if (IS_ERR(evport))
@@ -1855,9 +1865,11 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 	if (vlan > 4095 || qos > 7)
 		return -EINVAL;
 
-	err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
-	if (err)
-		return err;
+	if (esw->mode == MLX5_ESWITCH_OFFLOADS || !vst_mode_steering) {
+		err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags);
+		if (err)
+			return err;
+	}
 
 	evport->info.vlan = vlan;
 	evport->info.qos = qos;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 5a85a5d32be7..92644fbb5081 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -527,6 +527,12 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  u16 vport, u16 vlan, u8 qos, u8 set_flags);
 
+static inline bool esw_vst_mode_is_steering(struct mlx5_eswitch *esw)
+{
+	return (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, pop_vlan) &&
+		MLX5_CAP_ESW_INGRESS_ACL(esw->dev, push_vlan));
+}
+
 static inline bool mlx5_eswitch_vlan_actions_supported(struct mlx5_core_dev *dev,
 						       u8 vlan_depth)
 {
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 5fe5d198b57a..29d4b201c7b2 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1090,6 +1090,11 @@ enum {
 	MLX5_VPORT_ADMIN_STATE_AUTO  = 0x2,
 };
 
+enum {
+	MLX5_VPORT_CVLAN_INSERT_WHEN_NO_CVLAN  = 0x1,
+	MLX5_VPORT_CVLAN_INSERT_ALWAYS         = 0x3,
+};
+
 enum {
 	MLX5_L3_PROT_TYPE_IPV4		= 0,
 	MLX5_L3_PROT_TYPE_IPV6		= 1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index f3d1c62c98dd..a9ee7bc59c90 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -913,7 +913,8 @@ struct mlx5_ifc_e_switch_cap_bits {
 	u8         vport_svlan_insert[0x1];
 	u8         vport_cvlan_insert_if_not_exist[0x1];
 	u8         vport_cvlan_insert_overwrite[0x1];
-	u8         reserved_at_5[0x2];
+	u8         reserved_at_5[0x1];
+	u8         vport_cvlan_insert_always[0x1];
 	u8         esw_shared_ingress_acl[0x1];
 	u8         esw_uplink_ingress_acl[0x1];
 	u8         root_ft_on_other_esw[0x1];
-- 
cgit v1.3.1


From 936a192f974018b4f6040f6f77b1cc1e75bd8666 Mon Sep 17 00:00:00 2001
From: Kuniyuki Iwashima <kuniyu@amazon.com>
Date: Mon, 26 Dec 2022 22:27:52 +0900
Subject: tcp: Add TIME_WAIT sockets in bhash2.

Jiri Slaby reported regression of bind() with a simple repro. [0]

The repro creates a TIME_WAIT socket and tries to bind() a new socket
with the same local address and port.  Before commit 28044fc1d495 ("net:
Add a bhash2 table hashed by port and address"), the bind() failed with
-EADDRINUSE, but now it succeeds.

The cited commit should have put TIME_WAIT sockets into bhash2; otherwise,
inet_bhash2_conflict() misses TIME_WAIT sockets when validating bind()
requests if the address is not a wildcard one.

The straight option is to move sk_bind2_node from struct sock to struct
sock_common to add twsk to bhash2 as implemented as RFC. [1]  However, the
binary layout change in the struct sock could affect performances moving
hot fields on different cachelines.

To avoid that, we add another TIME_WAIT list in inet_bind2_bucket and check
it while validating bind().

[0]: https://lore.kernel.org/netdev/6b971a4e-c7d8-411e-1f92-fda29b5b2fb9@kernel.org/
[1]: https://lore.kernel.org/netdev/20221221151258.25748-2-kuniyu@amazon.com/

Fixes: 28044fc1d495 ("net: Add a bhash2 table hashed by port and address")
Reported-by: Jiri Slaby <jirislaby@kernel.org>
Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
Acked-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_hashtables.h    |  4 ++++
 include/net/inet_timewait_sock.h |  5 +++++
 net/ipv4/inet_connection_sock.c  | 26 ++++++++++++++++++++++----
 net/ipv4/inet_hashtables.c       |  8 +++++---
 net/ipv4/inet_timewait_sock.c    | 31 +++++++++++++++++++++++++++++--
 5 files changed, 65 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 69174093078f..99bd823e97f6 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -108,6 +108,10 @@ struct inet_bind2_bucket {
 	struct hlist_node	node;
 	/* List of sockets hashed to this bucket */
 	struct hlist_head	owners;
+	/* bhash has twsk in owners, but bhash2 has twsk in
+	 * deathrow not to add a member in struct sock_common.
+	 */
+	struct hlist_head	deathrow;
 };
 
 static inline struct net *ib_net(const struct inet_bind_bucket *ib)
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 5b47545f22d3..4a8e578405cb 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -73,9 +73,14 @@ struct inet_timewait_sock {
 	u32			tw_priority;
 	struct timer_list	tw_timer;
 	struct inet_bind_bucket	*tw_tb;
+	struct inet_bind2_bucket	*tw_tb2;
+	struct hlist_node		tw_bind2_node;
 };
 #define tw_tclass tw_tos
 
+#define twsk_for_each_bound_bhash2(__tw, list) \
+	hlist_for_each_entry(__tw, list, tw_bind2_node)
+
 static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk)
 {
 	return (struct inet_timewait_sock *)sk;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b366ab9148f2..848ffc3e0239 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -173,22 +173,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
 	return false;
 }
 
+static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
+				   kuid_t sk_uid, bool relax,
+				   bool reuseport_cb_ok, bool reuseport_ok)
+{
+	if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+		return false;
+
+	return inet_bind_conflict(sk, sk2, sk_uid, relax,
+				  reuseport_cb_ok, reuseport_ok);
+}
+
 static bool inet_bhash2_conflict(const struct sock *sk,
 				 const struct inet_bind2_bucket *tb2,
 				 kuid_t sk_uid,
 				 bool relax, bool reuseport_cb_ok,
 				 bool reuseport_ok)
 {
+	struct inet_timewait_sock *tw2;
 	struct sock *sk2;
 
 	sk_for_each_bound_bhash2(sk2, &tb2->owners) {
-		if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
-			continue;
+		if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+					   reuseport_cb_ok, reuseport_ok))
+			return true;
+	}
 
-		if (inet_bind_conflict(sk, sk2, sk_uid, relax,
-				       reuseport_cb_ok, reuseport_ok))
+	twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
+		sk2 = (struct sock *)tw2;
+
+		if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+					   reuseport_cb_ok, reuseport_ok))
 			return true;
 	}
+
 	return false;
 }
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d039b4e732a3..24a38b56fab9 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
 #endif
 		tb->rcv_saddr = sk->sk_rcv_saddr;
 	INIT_HLIST_HEAD(&tb->owners);
+	INIT_HLIST_HEAD(&tb->deathrow);
 	hlist_add_head(&tb->node, &head->chain);
 }
 
@@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
 /* Caller must hold hashbucket lock for this tb with local BH disabled */
 void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
 {
-	if (hlist_empty(&tb->owners)) {
+	if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) {
 		__hlist_del(&tb->node);
 		kmem_cache_free(cachep, tb);
 	}
@@ -1103,15 +1104,16 @@ ok:
 	/* Head lock still held and bh's disabled */
 	inet_bind_hash(sk, tb, tb2, port);
 
-	spin_unlock(&head2->lock);
-
 	if (sk_unhashed(sk)) {
 		inet_sk(sk)->inet_sport = htons(port);
 		inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
 	}
 	if (tw)
 		inet_twsk_bind_unhash(tw, hinfo);
+
+	spin_unlock(&head2->lock);
 	spin_unlock(&head->lock);
+
 	if (tw)
 		inet_twsk_deschedule_put(tw);
 	local_bh_enable();
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 66fc940f9521..1d77d992e6e7 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -29,6 +29,7 @@
 void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
 			  struct inet_hashinfo *hashinfo)
 {
+	struct inet_bind2_bucket *tb2 = tw->tw_tb2;
 	struct inet_bind_bucket *tb = tw->tw_tb;
 
 	if (!tb)
@@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
 	__hlist_del(&tw->tw_bind_node);
 	tw->tw_tb = NULL;
 	inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+	__hlist_del(&tw->tw_bind2_node);
+	tw->tw_tb2 = NULL;
+	inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+
 	__sock_put((struct sock *)tw);
 }
 
@@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
 {
 	struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
 	spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
-	struct inet_bind_hashbucket *bhead;
+	struct inet_bind_hashbucket *bhead, *bhead2;
 
 	spin_lock(lock);
 	sk_nulls_del_node_init_rcu((struct sock *)tw);
@@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
 	/* Disassociate with bind bucket. */
 	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
 			hashinfo->bhash_size)];
+	bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw,
+				       twsk_net(tw), tw->tw_num);
 
 	spin_lock(&bhead->lock);
+	spin_lock(&bhead2->lock);
 	inet_twsk_bind_unhash(tw, hashinfo);
+	spin_unlock(&bhead2->lock);
 	spin_unlock(&bhead->lock);
 
 	refcount_dec(&tw->tw_dr->tw_refcount);
@@ -93,6 +103,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
 	hlist_add_head(&tw->tw_bind_node, list);
 }
 
+static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
+				     struct hlist_head *list)
+{
+	hlist_add_head(&tw->tw_bind2_node, list);
+}
+
 /*
  * Enter the time wait state. This is called with locally disabled BH.
  * Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -105,17 +121,28 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
 	spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
-	struct inet_bind_hashbucket *bhead;
+	struct inet_bind_hashbucket *bhead, *bhead2;
+
 	/* Step 1: Put TW into bind hash. Original socket stays there too.
 	   Note, that any socket with inet->num != 0 MUST be bound in
 	   binding cache, even if it is closed.
 	 */
 	bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
 			hashinfo->bhash_size)];
+	bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);
+
 	spin_lock(&bhead->lock);
+	spin_lock(&bhead2->lock);
+
 	tw->tw_tb = icsk->icsk_bind_hash;
 	WARN_ON(!icsk->icsk_bind_hash);
 	inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+
+	tw->tw_tb2 = icsk->icsk_bind2_hash;
+	WARN_ON(!icsk->icsk_bind2_hash);
+	inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow);
+
+	spin_unlock(&bhead2->lock);
 	spin_unlock(&bhead->lock);
 
 	spin_lock(lock);
-- 
cgit v1.3.1


From 99cb0d917ffa1ab628bb67364ca9b162c07699b1 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <masahiroy@kernel.org>
Date: Tue, 27 Dec 2022 03:45:37 +0900
Subject: arch: fix broken BuildID for arm64 and riscv

Dennis Gilmore reports that the BuildID is missing in the arm64 vmlinux
since commit 994b7ac1697b ("arm64: remove special treatment for the
link order of head.o").

The issue is that the type of .notes section, which contains the BuildID,
changed from NOTES to PROGBITS.

Ard Biesheuvel figured out that whichever object gets linked first gets
to decide the type of a section. The PROGBITS type is the result of the
compiler emitting .note.GNU-stack as PROGBITS rather than NOTE.

While Ard provided a fix for arm64, I want to fix this globally because
the same issue is happening on riscv since commit 2348e6bf4421 ("riscv:
remove special treatment for the link order of head.o"). This problem
will happen in general for other architectures if they start to drop
unneeded entries from scripts/head-object-list.txt.

Discard .note.GNU-stack in include/asm-generic/vmlinux.lds.h.

Link: https://lore.kernel.org/lkml/CAABkxwuQoz1CTbyb57n0ZX65eSYiTonFCU8-LCQc=74D=xE=rA@mail.gmail.com/
Fixes: 994b7ac1697b ("arm64: remove special treatment for the link order of head.o")
Fixes: 2348e6bf4421 ("riscv: remove special treatment for the link order of head.o")
Reported-by: Dennis Gilmore <dennis@ausil.us>
Suggested-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Acked-by: Palmer Dabbelt <palmer@rivosinc.com>
---
 include/asm-generic/vmlinux.lds.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index a94219e9916f..659bf3b31c91 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -891,7 +891,12 @@
 #define PRINTK_INDEX
 #endif
 
+/*
+ * Discard .note.GNU-stack, which is emitted as PROGBITS by the compiler.
+ * Otherwise, the type of .notes section would become PROGBITS instead of NOTES.
+ */
 #define NOTES								\
+	/DISCARD/ : { *(.note.GNU-stack) }				\
 	.notes : AT(ADDR(.notes) - LOAD_OFFSET) {			\
 		BOUNDED_SECTION_BY(.note.*, _notes)			\
 	} NOTES_HEADERS							\
-- 
cgit v1.3.1


From d9dba91be71f03cc75bcf39fc0d5d99ff33f1ae0 Mon Sep 17 00:00:00 2001
From: Christian Marangi <ansuelsmth@gmail.com>
Date: Thu, 29 Dec 2022 17:33:33 +0100
Subject: net: dsa: tag_qca: fix wrong MGMT_DATA2 size

It was discovered that MGMT_DATA2 can contain up to 28 bytes of data
instead of the 12 bytes written in the Documentation by accounting the
limit of 16 bytes declared in Documentation subtracting the first 4 byte
in the packet header.

Update the define with the real world value.

Tested-by: Ronald Wahl <ronald.wahl@raritan.com>
Fixes: c2ee8181fddb ("net: dsa: tag_qca: add define for handling mgmt Ethernet packet")
Signed-off-by: Christian Marangi <ansuelsmth@gmail.com>
Cc: stable@vger.kernel.org # v5.18+
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/tag_qca.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/dsa/tag_qca.h b/include/linux/dsa/tag_qca.h
index b1b5720d89a5..ee657452f122 100644
--- a/include/linux/dsa/tag_qca.h
+++ b/include/linux/dsa/tag_qca.h
@@ -45,8 +45,8 @@ struct sk_buff;
 					QCA_HDR_MGMT_COMMAND_LEN + \
 					QCA_HDR_MGMT_DATA1_LEN)
 
-#define QCA_HDR_MGMT_DATA2_LEN		12 /* Other 12 byte for the mdio data */
-#define QCA_HDR_MGMT_PADDING_LEN	34 /* Padding to reach the min Ethernet packet */
+#define QCA_HDR_MGMT_DATA2_LEN		28 /* Other 28 byte for the mdio data */
+#define QCA_HDR_MGMT_PADDING_LEN	18 /* Padding to reach the min Ethernet packet */
 
 #define QCA_HDR_MGMT_PKT_LEN		(QCA_HDR_MGMT_HEADER_LEN + \
 					QCA_HDR_LEN + \
-- 
cgit v1.3.1


From 6d4cfcf97986cc67635630a2bc1f8d5c92ecdbba Mon Sep 17 00:00:00 2001
From: Sean Anderson <sean.anderson@seco.com>
Date: Thu, 29 Dec 2022 15:21:20 -0500
Subject: net: phy: Update documentation for get_rate_matching

Now that phylink no longer calls phy_get_rate_matching with
PHY_INTERFACE_MODE_NA, phys no longer need to support it. Remove the
documentation mandating support.

Fixes: 7642cc28fd37 ("net: phylink: fix PHY validation with rate adaption")
Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 71eeb4e3b1fd..6378c997ded5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -826,10 +826,7 @@ struct phy_driver {
 	 * whether to advertise lower-speed modes for that interface. It is
 	 * assumed that if a rate matching mode is supported on an interface,
 	 * then that interface's rate can be adapted to all slower link speeds
-	 * supported by the phy. If iface is %PHY_INTERFACE_MODE_NA, and the phy
-	 * supports any kind of rate matching for any interface, then it must
-	 * return that rate matching mode (preferring %RATE_MATCH_PAUSE to
-	 * %RATE_MATCH_CRS). If the interface is not supported, this should
+	 * supported by the phy. If the interface is not supported, this should
 	 * return %RATE_MATCH_NONE.
 	 */
 	int (*get_rate_matching)(struct phy_device *phydev,
-- 
cgit v1.3.1


From d19ab1f785d0b6b9f709799f0938658903821ba1 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 20 Dec 2022 15:13:34 +0100
Subject: mtd: cfi: allow building spi-intel standalone

When MTD or MTD_CFI_GEOMETRY is disabled, the spi-intel driver
fails to build, as it includes the shared CFI header:

include/linux/mtd/cfi.h:62:2: error: #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work. [-Werror=cpp]
   62 | #warning No CONFIG_MTD_CFI_Ix selected. No NOR chip support can work.

linux/mtd/spi-nor.h does not actually need to include cfi.h, so
remove the inclusion here to fix the warning. This uncovers a
missing #include in spi-nor/core.c so add that there to
prevent a different build issue.

Fixes: e23e5a05d1fd ("mtd: spi-nor: intel-spi: Convert to SPI MEM")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Tokunori Ikegami <ikegami.t@gmail.com>
Acked-by: Pratyush Yadav <pratyush@kernel.org>
Reviewed-by: Tudor Ambarus <tudor.ambarus@linaro.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20221220141352.1486360-1-arnd@kernel.org
---
 drivers/mtd/spi-nor/core.c  | 1 +
 include/linux/mtd/spi-nor.h | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c
index d8703d7dfd0a..d67c926bca8b 100644
--- a/drivers/mtd/spi-nor/core.c
+++ b/drivers/mtd/spi-nor/core.c
@@ -10,6 +10,7 @@
 #include <linux/err.h>
 #include <linux/errno.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/math64.h>
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 25765556223a..a3f8cdca90c8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -7,7 +7,6 @@
 #define __LINUX_MTD_SPI_NOR_H
 
 #include <linux/bitops.h>
-#include <linux/mtd/cfi.h>
 #include <linux/mtd/mtd.h>
 #include <linux/spi/spi-mem.h>
 
-- 
cgit v1.3.1


From 8e1858710d9a71d88acd922f2e95d1eddb90eea0 Mon Sep 17 00:00:00 2001
From: Xiubo Li <xiubli@redhat.com>
Date: Thu, 17 Nov 2022 10:57:53 +0800
Subject: ceph: avoid use-after-free in ceph_fl_release_lock()

When ceph releasing the file_lock it will try to get the inode pointer
from the fl->fl_file, which the memory could already be released by
another thread in filp_close(). Because in VFS layer the fl->fl_file
doesn't increase the file's reference counter.

Will switch to use ceph dedicate lock info to track the inode.

And in ceph_fl_release_lock() we should skip all the operations if the
fl->fl_u.ceph.inode is not set, which should come from the request
file_lock. And we will set fl->fl_u.ceph.inode when inserting it to the
inode lock list, which is when copying the lock.

Link: https://tracker.ceph.com/issues/57986
Signed-off-by: Xiubo Li <xiubli@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Ilya Dryomov <idryomov@gmail.com>
Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/ceph/locks.c    | 20 ++++++++++++++++++--
 include/linux/fs.h |  3 +++
 2 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 6b3b8c299c17..9c8dc8a55e7e 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -34,18 +34,34 @@ static void ceph_fl_copy_lock(struct file_lock *dst, struct file_lock *src)
 {
 	struct inode *inode = file_inode(dst->fl_file);
 	atomic_inc(&ceph_inode(inode)->i_filelock_ref);
+	dst->fl_u.ceph.inode = igrab(inode);
 }
 
+/*
+ * Do not use the 'fl->fl_file' in release function, which
+ * is possibly already released by another thread.
+ */
 static void ceph_fl_release_lock(struct file_lock *fl)
 {
-	struct inode *inode = file_inode(fl->fl_file);
-	struct ceph_inode_info *ci = ceph_inode(inode);
+	struct inode *inode = fl->fl_u.ceph.inode;
+	struct ceph_inode_info *ci;
+
+	/*
+	 * If inode is NULL it should be a request file_lock,
+	 * nothing we can do.
+	 */
+	if (!inode)
+		return;
+
+	ci = ceph_inode(inode);
 	if (atomic_dec_and_test(&ci->i_filelock_ref)) {
 		/* clear error when all locks are released */
 		spin_lock(&ci->i_ceph_lock);
 		ci->i_ceph_flags &= ~CEPH_I_ERROR_FILELOCK;
 		spin_unlock(&ci->i_ceph_lock);
 	}
+	fl->fl_u.ceph.inode = NULL;
+	iput(inode);
 }
 
 static const struct file_lock_operations ceph_fl_lock_ops = {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 066555ad1bf8..c1769a2c5d70 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1119,6 +1119,9 @@ struct file_lock {
 			int state;		/* state of grant or error if -ve */
 			unsigned int	debug_id;
 		} afs;
+		struct {
+			struct inode *inode;
+		} ceph;
 	} fl_u;
 } __randomize_layout;
 
-- 
cgit v1.3.1


From 5e29dc36bd5e2166b834ceb19990d9e68a734d7d Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@netfilter.org>
Date: Fri, 30 Dec 2022 13:24:38 +0100
Subject: netfilter: ipset: Rework long task execution when adding/deleting
 entries

When adding/deleting large number of elements in one step in ipset, it can
take a reasonable amount of time and can result in soft lockup errors. The
patch 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of
consecutive elements to add/delete") tried to fix it by limiting the max
elements to process at all. However it was not enough, it is still possible
that we get hung tasks. Lowering the limit is not reasonable, so the
approach in this patch is as follows: rely on the method used at resizing
sets and save the state when we reach a smaller internal batch limit,
unlock/lock and proceed from the saved state. Thus we can avoid long
continuous tasks and at the same time removed the limit to add/delete large
number of elements in one step.

The nfnl mutex is held during the whole operation which prevents one to
issue other ipset commands in parallel.

Fixes: 5f7b51bf09ba ("netfilter: ipset: Limit the maximal range of consecutive elements to add/delete")
Reported-by: syzbot+9204e7399656300bf271@syzkaller.appspotmail.com
Signed-off-by: Jozsef Kadlecsik <kadlec@netfilter.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/ipset/ip_set.h      |  2 +-
 net/netfilter/ipset/ip_set_core.c           |  7 ++++---
 net/netfilter/ipset/ip_set_hash_ip.c        | 14 +++++++-------
 net/netfilter/ipset/ip_set_hash_ipmark.c    | 13 +++++++------
 net/netfilter/ipset/ip_set_hash_ipport.c    | 13 +++++++------
 net/netfilter/ipset/ip_set_hash_ipportip.c  | 13 +++++++------
 net/netfilter/ipset/ip_set_hash_ipportnet.c | 13 ++++++++-----
 net/netfilter/ipset/ip_set_hash_net.c       | 17 +++++++----------
 net/netfilter/ipset/ip_set_hash_netiface.c  | 15 ++++++---------
 net/netfilter/ipset/ip_set_hash_netnet.c    | 23 +++++++----------------
 net/netfilter/ipset/ip_set_hash_netport.c   | 19 +++++++------------
 11 files changed, 68 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index ab934ad951a8..e8c350a3ade1 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -197,7 +197,7 @@ struct ip_set_region {
 };
 
 /* Max range where every element is added/deleted in one step */
-#define IPSET_MAX_RANGE		(1<<20)
+#define IPSET_MAX_RANGE		(1<<14)
 
 /* The max revision number supported by any set type + 1 */
 #define IPSET_REVISION_MAX	9
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e7ba5b6dd2b7..46ebee9400da 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
 		ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
 		ip_set_unlock(set);
 		retried = true;
-	} while (ret == -EAGAIN &&
-		 set->variant->resize &&
-		 (ret = set->variant->resize(set, retried)) == 0);
+	} while (ret == -ERANGE ||
+		 (ret == -EAGAIN &&
+		  set->variant->resize &&
+		  (ret = set->variant->resize(set, retried)) == 0));
 
 	if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
 		return 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e30513cefd90..c9f4e3859663 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -100,11 +100,11 @@ static int
 hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 	      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ip4 *h = set->data;
+	struct hash_ip4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ip4_elem e = { 0 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip = 0, ip_to = 0, hosts;
+	u32 ip = 0, ip_to = 0, hosts, i = 0;
 	int ret = 0;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
 
-	/* 64bit division is not allowed on 32bit */
-	if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; ip <= ip_to;) {
+	for (; ip <= ip_to; i++) {
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_ip4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ret = adtfn(set, &e, &ext, &ext, flags);
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 153de3457423..a22ec1a6f6ec 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -97,11 +97,11 @@ static int
 hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipmark4 *h = set->data;
+	struct hash_ipmark4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipmark4_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip, ip_to = 0;
+	u32 ip, ip_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 		ip_set_mask_from_to(ip, ip_to, cidr);
 	}
 
-	if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
-	for (; ip <= ip_to; ip++) {
+	for (; ip <= ip_to; ip++, i++) {
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_ipmark4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ret = adtfn(set, &e, &ext, &ext, flags);
 
 		if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 2ffbd0b78a8c..e977b5a9c48d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -112,11 +112,11 @@ static int
 hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipport4 *h = set->data;
+	struct hash_ipport4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipport4_elem e = { .ip = 0 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip, ip_to = 0, p = 0, port, port_to;
+	u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
 	bool with_ports = false;
 	int ret;
 
@@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(port, port_to);
 	}
 
-	if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
 	for (; ip <= ip_to; ip++) {
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
-		for (; p <= port_to; p++) {
+		for (; p <= port_to; p++, i++) {
 			e.ip = htonl(ip);
 			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_ipport4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ret = adtfn(set, &e, &ext, &ext, flags);
 
 			if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 334fb1ad0e86..39a01934b153 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -108,11 +108,11 @@ static int
 hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipportip4 *h = set->data;
+	struct hash_ipportip4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportip4_elem e = { .ip = 0 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip, ip_to = 0, p = 0, port, port_to;
+	u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
 	bool with_ports = false;
 	int ret;
 
@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(port, port_to);
 	}
 
-	if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	if (retried)
 		ip = ntohl(h->next.ip);
 	for (; ip <= ip_to; ip++) {
 		p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
 						       : port;
-		for (; p <= port_to; p++) {
+		for (; p <= port_to; p++, i++) {
 			e.ip = htonl(ip);
 			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_ipportip4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ret = adtfn(set, &e, &ext, &ext, flags);
 
 			if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7df94f437f60..5c6de605a9fb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -160,12 +160,12 @@ static int
 hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_ipportnet4 *h = set->data;
+	struct hash_ipportnet4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0, p = 0, port, port_to;
-	u32 ip2_from = 0, ip2_to = 0, ip2;
+	u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(port, port_to);
 	}
 
-	if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
-
 	ip2_to = ip2_from;
 	if (tb[IPSET_ATTR_IP2_TO]) {
 		ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		for (; p <= port_to; p++) {
 			e.port = htons(p);
 			do {
+				i++;
 				e.ip2 = htonl(ip2);
 				ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
 				e.cidr = cidr - 1;
+				if (i > IPSET_MAX_RANGE) {
+					hash_ipportnet4_data_next(&h->next,
+								  &e);
+					return -ERANGE;
+				}
 				ret = adtfn(set, &e, &ext, &ext, flags);
 
 				if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1422739d9aa2..ce0a9ce5a91f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -136,11 +136,11 @@ static int
 hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_net4 *h = set->data;
+	struct hash_net4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_net4_elem e = { .cidr = HOST_MASK };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip = 0, ip_to = 0, ipn, n = 0;
+	u32 ip = 0, ip_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
-		n++;
-	} while (ipn++ < ip_to);
-
-	if (n > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried)
 		ip = ntohl(h->next.ip);
 	do {
+		i++;
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_net4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
 		ret = adtfn(set, &e, &ext, &ext, flags);
 		if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 9810f5bf63f5..031073286236 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 ip = 0, ip_to = 0, ipn, n = 0;
+	u32 ip = 0, ip_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr);
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
-		n++;
-	} while (ipn++ < ip_to);
-
-	if (n > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried)
 		ip = ntohl(h->next.ip);
 	do {
+		i++;
 		e.ip = htonl(ip);
+		if (i > IPSET_MAX_RANGE) {
+			hash_netiface4_data_next(&h->next, &e);
+			return -ERANGE;
+		}
 		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
 		ret = adtfn(set, &e, &ext, &ext, flags);
 
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index cdfb78c6e0d3..8fbe649c9dd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -166,13 +166,12 @@ static int
 hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_netnet4 *h = set->data;
+	struct hash_netnet4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netnet4_elem e = { };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
 	u32 ip = 0, ip_to = 0;
-	u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
-	u64 n = 0, m = 0;
+	u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
 	int ret;
 
 	if (tb[IPSET_ATTR_LINENO])
@@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
-		n++;
-	} while (ipn++ < ip_to);
-	ipn = ip2_from;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
-		m++;
-	} while (ipn++ < ip2_to);
-
-	if (n*m > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried) {
 		ip = ntohl(h->next.ip[0]);
@@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 		e.ip[0] = htonl(ip);
 		ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
 		do {
+			i++;
 			e.ip[1] = htonl(ip2);
+			if (i > IPSET_MAX_RANGE) {
+				hash_netnet4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
 			ret = adtfn(set, &e, &ext, &ext, flags);
 			if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09cf72eb37f8..d1a0628df4ef 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -154,12 +154,11 @@ static int
 hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
-	const struct hash_netport4 *h = set->data;
+	struct hash_netport4 *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
 	struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
-	u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
-	u64 n = 0;
+	u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
 	bool with_ports = false;
 	u8 cidr;
 	int ret;
@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
 	}
-	ipn = ip;
-	do {
-		ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
-		n++;
-	} while (ipn++ < ip_to);
-
-	if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
-		return -ERANGE;
 
 	if (retried) {
 		ip = ntohl(h->next.ip);
@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 		e.ip = htonl(ip);
 		ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
 		e.cidr = cidr - 1;
-		for (; p <= port_to; p++) {
+		for (; p <= port_to; p++, i++) {
 			e.port = htons(p);
+			if (i > IPSET_MAX_RANGE) {
+				hash_netport4_data_next(&h->next, &e);
+				return -ERANGE;
+			}
 			ret = adtfn(set, &e, &ext, &ext, flags);
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-- 
cgit v1.3.1


From 59b745bb4e0bd445366c45b8df6b51b69134f4f5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 13:49:54 -0700
Subject: io_uring: move 'poll_multi_queue' bool in io_ring_ctx

The cacheline section holding this variable has two gaps, where one is
caused by this bool not packing well with structs. This causes it to
blow into the next cacheline. Move the variable, shrinking io_ring_ctx
by a full cacheline in size.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/io_uring_types.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h
index dcd8a563ab52..128a67a40065 100644
--- a/include/linux/io_uring_types.h
+++ b/include/linux/io_uring_types.h
@@ -292,6 +292,8 @@ struct io_ring_ctx {
 	struct {
 		spinlock_t		completion_lock;
 
+		bool			poll_multi_queue;
+
 		/*
 		 * ->iopoll_list is protected by the ctx->uring_lock for
 		 * io_uring instances that don't use IORING_SETUP_SQPOLL.
@@ -300,7 +302,6 @@ struct io_ring_ctx {
 		 */
 		struct io_wq_work_list	iopoll_list;
 		struct io_hash_table	cancel_table;
-		bool			poll_multi_queue;
 
 		struct llist_head	work_llist;
 
-- 
cgit v1.3.1


From ee4b4e2248565babfba807d82c0f3e00c392a4c0 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 14:43:27 -0700
Subject: Revert "block: bio_copy_data_iter"

This reverts commit db1c7d77976775483a8ef240b4c705f113e13ea1.

We're reinstating the pktcdvd driver, which needs this API.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/bio.c         | 37 ++++++++++++++++++++++---------------
 include/linux/bio.h |  2 ++
 2 files changed, 24 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/block/bio.c b/block/bio.c
index 5f96fcae3f75..ab59a491a883 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1401,6 +1401,27 @@ void __bio_advance(struct bio *bio, unsigned bytes)
 }
 EXPORT_SYMBOL(__bio_advance);
 
+void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+			struct bio *src, struct bvec_iter *src_iter)
+{
+	while (src_iter->bi_size && dst_iter->bi_size) {
+		struct bio_vec src_bv = bio_iter_iovec(src, *src_iter);
+		struct bio_vec dst_bv = bio_iter_iovec(dst, *dst_iter);
+		unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
+		void *src_buf = bvec_kmap_local(&src_bv);
+		void *dst_buf = bvec_kmap_local(&dst_bv);
+
+		memcpy(dst_buf, src_buf, bytes);
+
+		kunmap_local(dst_buf);
+		kunmap_local(src_buf);
+
+		bio_advance_iter_single(src, src_iter, bytes);
+		bio_advance_iter_single(dst, dst_iter, bytes);
+	}
+}
+EXPORT_SYMBOL(bio_copy_data_iter);
+
 /**
  * bio_copy_data - copy contents of data buffers from one bio to another
  * @src: source bio
@@ -1414,21 +1435,7 @@ void bio_copy_data(struct bio *dst, struct bio *src)
 	struct bvec_iter src_iter = src->bi_iter;
 	struct bvec_iter dst_iter = dst->bi_iter;
 
-	while (src_iter.bi_size && dst_iter.bi_size) {
-		struct bio_vec src_bv = bio_iter_iovec(src, src_iter);
-		struct bio_vec dst_bv = bio_iter_iovec(dst, dst_iter);
-		unsigned int bytes = min(src_bv.bv_len, dst_bv.bv_len);
-		void *src_buf = bvec_kmap_local(&src_bv);
-		void *dst_buf = bvec_kmap_local(&dst_bv);
-
-		memcpy(dst_buf, src_buf, bytes);
-
-		kunmap_local(dst_buf);
-		kunmap_local(src_buf);
-
-		bio_advance_iter_single(src, &src_iter, bytes);
-		bio_advance_iter_single(dst, &dst_iter, bytes);
-	}
+	bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
 }
 EXPORT_SYMBOL(bio_copy_data);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 22078a28d7cb..c1da63f6c808 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -475,6 +475,8 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
+extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
+			       struct bio *src, struct bvec_iter *src_iter);
 extern void bio_copy_data(struct bio *dst, struct bio *src);
 extern void bio_free_pages(struct bio *bio);
 void guard_bio_eod(struct bio *bio);
-- 
cgit v1.3.1


From 050a4f341f35bf51db321c7f68700f9e0b1a7552 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 14:44:02 -0700
Subject: Revert "block: remove devnode callback from struct
 block_device_operations"

This reverts commit 85d6ce58e493ac8b7122e2fbe3f41b94d6ebdc11.

We're reinstating the pktcdvd driver, which needs this API.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/genhd.c          | 11 +++++++++++
 include/linux/blkdev.h |  1 +
 2 files changed, 12 insertions(+)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index 08f76135a637..14329dc278b2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1201,10 +1201,21 @@ struct class block_class = {
 	.dev_uevent	= block_uevent,
 };
 
+static char *block_devnode(struct device *dev, umode_t *mode,
+			   kuid_t *uid, kgid_t *gid)
+{
+	struct gendisk *disk = dev_to_disk(dev);
+
+	if (disk->fops->devnode)
+		return disk->fops->devnode(disk, mode);
+	return NULL;
+}
+
 const struct device_type disk_type = {
 	.name		= "disk",
 	.groups		= disk_attr_groups,
 	.release	= disk_release,
+	.devnode	= block_devnode,
 };
 
 #ifdef CONFIG_PROC_FS
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 301cf1cf4f2f..43d4e073b111 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1395,6 +1395,7 @@ struct block_device_operations {
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	int (*report_zones)(struct gendisk *, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
+	char *(*devnode)(struct gendisk *disk, umode_t *mode);
 	/* returns the length of the identifier or a negative errno: */
 	int (*get_unique_id)(struct gendisk *disk, u8 id[16],
 			enum blk_unique_id id_type);
-- 
cgit v1.3.1


From 4b83e99ee7092df37a5cf292fde976ebc475ea63 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 4 Jan 2023 14:44:13 -0700
Subject: Revert "pktcdvd: remove driver."
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit f40eb99897af665f11858dd7b56edcb62c3f3c67.

There are apparently still users out there of this driver. While we'd
love to remove it to ease the maintenance burden, let's reinstate it
for now until better (userspace) solutions can be developed.

Link: https://lore.kernel.org/lkml/20230104190115.ceglfefco475ev6c@pali/
Reported-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 Documentation/ABI/testing/debugfs-pktcdvd     |   18 +
 Documentation/ABI/testing/sysfs-class-pktcdvd |   97 +
 MAINTAINERS                                   |    7 +
 drivers/block/Kconfig                         |   43 +
 drivers/block/Makefile                        |    1 +
 drivers/block/pktcdvd.c                       | 2944 +++++++++++++++++++++++++
 include/linux/pktcdvd.h                       |  197 ++
 include/uapi/linux/pktcdvd.h                  |  112 +
 8 files changed, 3419 insertions(+)
 create mode 100644 Documentation/ABI/testing/debugfs-pktcdvd
 create mode 100644 Documentation/ABI/testing/sysfs-class-pktcdvd
 create mode 100644 drivers/block/pktcdvd.c
 create mode 100644 include/linux/pktcdvd.h
 create mode 100644 include/uapi/linux/pktcdvd.h

(limited to 'include')

diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd
new file mode 100644
index 000000000000..f6f65a4faea0
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-pktcdvd
@@ -0,0 +1,18 @@
+What:           /sys/kernel/debug/pktcdvd/pktcdvd[0-7]
+Date:           Oct. 2006
+KernelVersion:  2.6.20
+Contact:        Thomas Maier <balagi@justmail.de>
+Description:
+
+The pktcdvd module (packet writing driver) creates
+these files in debugfs:
+
+/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/
+
+    ====            ====== ====================================
+    info            0444   Lots of driver statistics and infos.
+    ====            ====== ====================================
+
+Example::
+
+    cat /sys/kernel/debug/pktcdvd/pktcdvd0/info
diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd
new file mode 100644
index 000000000000..ba1ce626591d
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-class-pktcdvd
@@ -0,0 +1,97 @@
+sysfs interface
+---------------
+The pktcdvd module (packet writing driver) creates the following files in the
+sysfs: (<devid> is in the format major:minor)
+
+What:		/sys/class/pktcdvd/add
+What:		/sys/class/pktcdvd/remove
+What:		/sys/class/pktcdvd/device_map
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+
+		==========	==============================================
+		add		(WO) Write a block device id (major:minor) to
+				create a new pktcdvd device and map it to the
+				block device.
+
+		remove		(WO) Write the pktcdvd device id (major:minor)
+				to remove the pktcdvd device.
+
+		device_map	(RO) Shows the device mapping in format:
+				pktcdvd[0-7] <pktdevid> <blkdevid>
+		==========	==============================================
+
+
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/dev
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/uevent
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+		dev:	(RO) Device id
+
+		uevent:	(WO) To send a uevent
+
+
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/stat/reset
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+		packets_started:	(RO) Number of started packets.
+
+		packets_finished:	(RO) Number of finished packets.
+
+		kb_written:		(RO) kBytes written.
+
+		kb_read:		(RO) kBytes read.
+
+		kb_read_gather:		(RO) kBytes read to fill write packets.
+
+		reset:			(WO) Write any value to it to reset
+					pktcdvd device statistic values, like
+					bytes read/written.
+
+
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off
+What:		/sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on
+Date:		Oct. 2006
+KernelVersion:	2.6.20
+Contact:	Thomas Maier <balagi@justmail.de>
+Description:
+		==============	================================================
+		size		(RO) Contains the size of the bio write queue.
+
+		congestion_off	(RW) If bio write queue size is below this mark,
+				accept new bio requests from the block layer.
+
+		congestion_on	(RW) If bio write queue size is higher as this
+				mark, do no longer accept bio write requests
+				from the block layer and wait till the pktcdvd
+				device has processed enough bio's so that bio
+				write queue size is below congestion off mark.
+				A value of <= 0 disables congestion control.
+		==============	================================================
+
+
+Example:
+--------
+To use the pktcdvd sysfs interface directly, you can do::
+
+    # create a new pktcdvd device mapped to /dev/hdc
+    echo "22:0" >/sys/class/pktcdvd/add
+    cat /sys/class/pktcdvd/device_map
+    # assuming device pktcdvd0 was created, look at stat's
+    cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written
+    # print the device id of the mapped block device
+    fgrep pktcdvd0 /sys/class/pktcdvd/device_map
+    # remove device, using pktcdvd0 device id   253:0
+    echo "253:0" >/sys/class/pktcdvd/remove
diff --git a/MAINTAINERS b/MAINTAINERS
index d53b3a6cdc67..3ef137fea4f6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16520,6 +16520,13 @@ S:	Supported
 F:	Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml
 F:	drivers/input/keyboard/pinephone-keyboard.c
 
+PKTCDVD DRIVER
+M:	linux-block@vger.kernel.org
+S:	Orphan
+F:	drivers/block/pktcdvd.c
+F:	include/linux/pktcdvd.h
+F:	include/uapi/linux/pktcdvd.h
+
 PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER
 M:	Tomasz Duszynski <tduszyns@gmail.com>
 S:	Maintained
diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index a2184b428493..a41145d52de9 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -285,6 +285,49 @@ config BLK_DEV_RAM_SIZE
 	  The default value is 4096 kilobytes. Only change this if you know
 	  what you are doing.
 
+config CDROM_PKTCDVD
+	tristate "Packet writing on CD/DVD media (DEPRECATED)"
+	depends on !UML
+	depends on SCSI
+	select CDROM
+	help
+	  Note: This driver is deprecated and will be removed from the
+	  kernel in the near future!
+
+	  If you have a CDROM/DVD drive that supports packet writing, say
+	  Y to include support. It should work with any MMC/Mt Fuji
+	  compliant ATAPI or SCSI drive, which is just about any newer
+	  DVD/CD writer.
+
+	  Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs
+	  is possible.
+	  DVD-RW disks must be in restricted overwrite mode.
+
+	  See the file <file:Documentation/cdrom/packet-writing.rst>
+	  for further information on the use of this driver.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called pktcdvd.
+
+config CDROM_PKTCDVD_BUFFERS
+	int "Free buffers for data gathering"
+	depends on CDROM_PKTCDVD
+	default "8"
+	help
+	  This controls the maximum number of active concurrent packets. More
+	  concurrent packets can increase write performance, but also require
+	  more memory. Each concurrent packet will require approximately 64Kb
+	  of non-swappable kernel memory, memory which will be allocated when
+	  a disc is opened for writing.
+
+config CDROM_PKTCDVD_WCACHE
+	bool "Enable write caching"
+	depends on CDROM_PKTCDVD
+	help
+	  If enabled, write caching will be set for the CD-R/W device. For now
+	  this option is dangerous unless the CD-RW media is known good, as we
+	  don't do deferred write error handling yet.
+
 config ATA_OVER_ETH
 	tristate "ATA over Ethernet support"
 	depends on NET
diff --git a/drivers/block/Makefile b/drivers/block/Makefile
index 962ee65d8ca3..101612cba303 100644
--- a/drivers/block/Makefile
+++ b/drivers/block/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_AMIGA_Z2RAM)	+= z2ram.o
 obj-$(CONFIG_N64CART)		+= n64cart.o
 obj-$(CONFIG_BLK_DEV_RAM)	+= brd.o
 obj-$(CONFIG_BLK_DEV_LOOP)	+= loop.o
+obj-$(CONFIG_CDROM_PKTCDVD)	+= pktcdvd.o
 obj-$(CONFIG_SUNVDC)		+= sunvdc.o
 
 obj-$(CONFIG_BLK_DEV_NBD)	+= nbd.o
diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
new file mode 100644
index 000000000000..4cea3b08087e
--- /dev/null
+++ b/drivers/block/pktcdvd.c
@@ -0,0 +1,2944 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ * Copyright (C) 2006 Thomas Maier <balagi@justmail.de>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and
+ * DVD-RAM devices.
+ *
+ * Theory of operation:
+ *
+ * At the lowest level, there is the standard driver for the CD/DVD device,
+ * such as drivers/scsi/sr.c. This driver can handle read and write requests,
+ * but it doesn't know anything about the special restrictions that apply to
+ * packet writing. One restriction is that write requests must be aligned to
+ * packet boundaries on the physical media, and the size of a write request
+ * must be equal to the packet size. Another restriction is that a
+ * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read
+ * command, if the previous command was a write.
+ *
+ * The purpose of the packet writing driver is to hide these restrictions from
+ * higher layers, such as file systems, and present a block device that can be
+ * randomly read and written using 2kB-sized blocks.
+ *
+ * The lowest layer in the packet writing driver is the packet I/O scheduler.
+ * Its data is defined by the struct packet_iosched and includes two bio
+ * queues with pending read and write requests. These queues are processed
+ * by the pkt_iosched_process_queue() function. The write requests in this
+ * queue are already properly aligned and sized. This layer is responsible for
+ * issuing the flush cache commands and scheduling the I/O in a good order.
+ *
+ * The next layer transforms unaligned write requests to aligned writes. This
+ * transformation requires reading missing pieces of data from the underlying
+ * block device, assembling the pieces to full packets and queuing them to the
+ * packet I/O scheduler.
+ *
+ * At the top layer there is a custom ->submit_bio function that forwards
+ * read requests directly to the iosched queue and puts write requests in the
+ * unaligned write queue. A kernel thread performs the necessary read
+ * gathering to convert the unaligned writes to aligned writes and then feeds
+ * them to the packet I/O scheduler.
+ *
+ *************************************************************************/
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/pktcdvd.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/compat.h>
+#include <linux/kthread.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/file.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/miscdevice.h>
+#include <linux/freezer.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/backing-dev.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_ioctl.h>
+#include <scsi/scsi.h>
+#include <linux/debugfs.h>
+#include <linux/device.h>
+#include <linux/nospec.h>
+#include <linux/uaccess.h>
+
+#define DRIVER_NAME	"pktcdvd"
+
+#define pkt_err(pd, fmt, ...)						\
+	pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_notice(pd, fmt, ...)					\
+	pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_info(pd, fmt, ...)						\
+	pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
+
+#define pkt_dbg(level, pd, fmt, ...)					\
+do {									\
+	if (level == 2 && PACKET_DEBUG >= 2)				\
+		pr_notice("%s: %s():" fmt,				\
+			  pd->name, __func__, ##__VA_ARGS__);		\
+	else if (level == 1 && PACKET_DEBUG >= 1)			\
+		pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__);		\
+} while (0)
+
+#define MAX_SPEED 0xffff
+
+static DEFINE_MUTEX(pktcdvd_mutex);
+static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
+static struct proc_dir_entry *pkt_proc;
+static int pktdev_major;
+static int write_congestion_on  = PKT_WRITE_CONGESTION_ON;
+static int write_congestion_off = PKT_WRITE_CONGESTION_OFF;
+static struct mutex ctl_mutex;	/* Serialize open/close/setup/teardown */
+static mempool_t psd_pool;
+static struct bio_set pkt_bio_set;
+
+static struct class	*class_pktcdvd = NULL;    /* /sys/class/pktcdvd */
+static struct dentry	*pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */
+
+/* forward declaration */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
+static int pkt_remove_dev(dev_t pkt_dev);
+static int pkt_seq_show(struct seq_file *m, void *p);
+
+static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
+{
+	return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1);
+}
+
+/**********************************************************
+ * sysfs interface for pktcdvd
+ * by (C) 2006  Thomas Maier <balagi@justmail.de>
+ 
+  /sys/class/pktcdvd/pktcdvd[0-7]/
+                     stat/reset
+                     stat/packets_started
+                     stat/packets_finished
+                     stat/kb_written
+                     stat/kb_read
+                     stat/kb_read_gather
+                     write_queue/size
+                     write_queue/congestion_off
+                     write_queue/congestion_on
+ **********************************************************/
+
+static ssize_t packets_started_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started);
+}
+static DEVICE_ATTR_RO(packets_started);
+
+static ssize_t packets_finished_show(struct device *dev,
+				     struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended);
+}
+static DEVICE_ATTR_RO(packets_finished);
+
+static ssize_t kb_written_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1);
+}
+static DEVICE_ATTR_RO(kb_written);
+
+static ssize_t kb_read_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1);
+}
+static DEVICE_ATTR_RO(kb_read);
+
+static ssize_t kb_read_gather_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1);
+}
+static DEVICE_ATTR_RO(kb_read_gather);
+
+static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
+			   const char *buf, size_t len)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+
+	if (len > 0) {
+		pd->stats.pkt_started = 0;
+		pd->stats.pkt_ended = 0;
+		pd->stats.secs_w = 0;
+		pd->stats.secs_rg = 0;
+		pd->stats.secs_r = 0;
+	}
+	return len;
+}
+static DEVICE_ATTR_WO(reset);
+
+static struct attribute *pkt_stat_attrs[] = {
+	&dev_attr_packets_finished.attr,
+	&dev_attr_packets_started.attr,
+	&dev_attr_kb_read.attr,
+	&dev_attr_kb_written.attr,
+	&dev_attr_kb_read_gather.attr,
+	&dev_attr_reset.attr,
+	NULL,
+};
+
+static const struct attribute_group pkt_stat_group = {
+	.name = "stat",
+	.attrs = pkt_stat_attrs,
+};
+
+static ssize_t size_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int n;
+
+	spin_lock(&pd->lock);
+	n = sysfs_emit(buf, "%d\n", pd->bio_queue_size);
+	spin_unlock(&pd->lock);
+	return n;
+}
+static DEVICE_ATTR_RO(size);
+
+static void init_write_congestion_marks(int* lo, int* hi)
+{
+	if (*hi > 0) {
+		*hi = max(*hi, 500);
+		*hi = min(*hi, 1000000);
+		if (*lo <= 0)
+			*lo = *hi - 100;
+		else {
+			*lo = min(*lo, *hi - 100);
+			*lo = max(*lo, 100);
+		}
+	} else {
+		*hi = -1;
+		*lo = -1;
+	}
+}
+
+static ssize_t congestion_off_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int n;
+
+	spin_lock(&pd->lock);
+	n = sysfs_emit(buf, "%d\n", pd->write_congestion_off);
+	spin_unlock(&pd->lock);
+	return n;
+}
+
+static ssize_t congestion_off_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t len)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_off = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
+	}
+	return len;
+}
+static DEVICE_ATTR_RW(congestion_off);
+
+static ssize_t congestion_on_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int n;
+
+	spin_lock(&pd->lock);
+	n = sysfs_emit(buf, "%d\n", pd->write_congestion_on);
+	spin_unlock(&pd->lock);
+	return n;
+}
+
+static ssize_t congestion_on_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t len)
+{
+	struct pktcdvd_device *pd = dev_get_drvdata(dev);
+	int val;
+
+	if (sscanf(buf, "%d", &val) == 1) {
+		spin_lock(&pd->lock);
+		pd->write_congestion_on = val;
+		init_write_congestion_marks(&pd->write_congestion_off,
+					&pd->write_congestion_on);
+		spin_unlock(&pd->lock);
+	}
+	return len;
+}
+static DEVICE_ATTR_RW(congestion_on);
+
+static struct attribute *pkt_wq_attrs[] = {
+	&dev_attr_congestion_on.attr,
+	&dev_attr_congestion_off.attr,
+	&dev_attr_size.attr,
+	NULL,
+};
+
+static const struct attribute_group pkt_wq_group = {
+	.name = "write_queue",
+	.attrs = pkt_wq_attrs,
+};
+
+static const struct attribute_group *pkt_groups[] = {
+	&pkt_stat_group,
+	&pkt_wq_group,
+	NULL,
+};
+
+static void pkt_sysfs_dev_new(struct pktcdvd_device *pd)
+{
+	if (class_pktcdvd) {
+		pd->dev = device_create_with_groups(class_pktcdvd, NULL,
+						    MKDEV(0, 0), pd, pkt_groups,
+						    "%s", pd->name);
+		if (IS_ERR(pd->dev))
+			pd->dev = NULL;
+	}
+}
+
+static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd)
+{
+	if (class_pktcdvd)
+		device_unregister(pd->dev);
+}
+
+
+/********************************************************************
+  /sys/class/pktcdvd/
+                     add            map block device
+                     remove         unmap packet dev
+                     device_map     show mappings
+ *******************************************************************/
+
+static void class_pktcdvd_release(struct class *cls)
+{
+	kfree(cls);
+}
+
+static ssize_t device_map_show(struct class *c, struct class_attribute *attr,
+			       char *data)
+{
+	int n = 0;
+	int idx;
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+	for (idx = 0; idx < MAX_WRITERS; idx++) {
+		struct pktcdvd_device *pd = pkt_devs[idx];
+		if (!pd)
+			continue;
+		n += sprintf(data+n, "%s %u:%u %u:%u\n",
+			pd->name,
+			MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev),
+			MAJOR(pd->bdev->bd_dev),
+			MINOR(pd->bdev->bd_dev));
+	}
+	mutex_unlock(&ctl_mutex);
+	return n;
+}
+static CLASS_ATTR_RO(device_map);
+
+static ssize_t add_store(struct class *c, struct class_attribute *attr,
+			 const char *buf, size_t count)
+{
+	unsigned int major, minor;
+
+	if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+		/* pkt_setup_dev() expects caller to hold reference to self */
+		if (!try_module_get(THIS_MODULE))
+			return -ENODEV;
+
+		pkt_setup_dev(MKDEV(major, minor), NULL);
+
+		module_put(THIS_MODULE);
+
+		return count;
+	}
+
+	return -EINVAL;
+}
+static CLASS_ATTR_WO(add);
+
+static ssize_t remove_store(struct class *c, struct class_attribute *attr,
+			    const char *buf, size_t count)
+{
+	unsigned int major, minor;
+	if (sscanf(buf, "%u:%u", &major, &minor) == 2) {
+		pkt_remove_dev(MKDEV(major, minor));
+		return count;
+	}
+	return -EINVAL;
+}
+static CLASS_ATTR_WO(remove);
+
+static struct attribute *class_pktcdvd_attrs[] = {
+	&class_attr_add.attr,
+	&class_attr_remove.attr,
+	&class_attr_device_map.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(class_pktcdvd);
+
+static int pkt_sysfs_init(void)
+{
+	int ret = 0;
+
+	/*
+	 * create control files in sysfs
+	 * /sys/class/pktcdvd/...
+	 */
+	class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL);
+	if (!class_pktcdvd)
+		return -ENOMEM;
+	class_pktcdvd->name = DRIVER_NAME;
+	class_pktcdvd->owner = THIS_MODULE;
+	class_pktcdvd->class_release = class_pktcdvd_release;
+	class_pktcdvd->class_groups = class_pktcdvd_groups;
+	ret = class_register(class_pktcdvd);
+	if (ret) {
+		kfree(class_pktcdvd);
+		class_pktcdvd = NULL;
+		pr_err("failed to create class pktcdvd\n");
+		return ret;
+	}
+	return 0;
+}
+
+static void pkt_sysfs_cleanup(void)
+{
+	if (class_pktcdvd)
+		class_destroy(class_pktcdvd);
+	class_pktcdvd = NULL;
+}
+
+/********************************************************************
+  entries in debugfs
+
+  /sys/kernel/debug/pktcdvd[0-7]/
+			info
+
+ *******************************************************************/
+
+static int pkt_debugfs_seq_show(struct seq_file *m, void *p)
+{
+	return pkt_seq_show(m, p);
+}
+
+static int pkt_debugfs_fops_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, pkt_debugfs_seq_show, inode->i_private);
+}
+
+static const struct file_operations debug_fops = {
+	.open		= pkt_debugfs_fops_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.owner		= THIS_MODULE,
+};
+
+static void pkt_debugfs_dev_new(struct pktcdvd_device *pd)
+{
+	if (!pkt_debugfs_root)
+		return;
+	pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root);
+	if (!pd->dfs_d_root)
+		return;
+
+	pd->dfs_f_info = debugfs_create_file("info", 0444,
+					     pd->dfs_d_root, pd, &debug_fops);
+}
+
+static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd)
+{
+	if (!pkt_debugfs_root)
+		return;
+	debugfs_remove(pd->dfs_f_info);
+	debugfs_remove(pd->dfs_d_root);
+	pd->dfs_f_info = NULL;
+	pd->dfs_d_root = NULL;
+}
+
+static void pkt_debugfs_init(void)
+{
+	pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL);
+}
+
+static void pkt_debugfs_cleanup(void)
+{
+	debugfs_remove(pkt_debugfs_root);
+	pkt_debugfs_root = NULL;
+}
+
+/* ----------------------------------------------------------*/
+
+
+static void pkt_bio_finished(struct pktcdvd_device *pd)
+{
+	BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
+	if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
+		pkt_dbg(2, pd, "queue empty\n");
+		atomic_set(&pd->iosched.attention, 1);
+		wake_up(&pd->wqueue);
+	}
+}
+
+/*
+ * Allocate a packet_data struct
+ */
+static struct packet_data *pkt_alloc_packet_data(int frames)
+{
+	int i;
+	struct packet_data *pkt;
+
+	pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL);
+	if (!pkt)
+		goto no_pkt;
+
+	pkt->frames = frames;
+	pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL);
+	if (!pkt->w_bio)
+		goto no_bio;
+
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++) {
+		pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
+		if (!pkt->pages[i])
+			goto no_page;
+	}
+
+	spin_lock_init(&pkt->lock);
+	bio_list_init(&pkt->orig_bios);
+
+	for (i = 0; i < frames; i++) {
+		pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL);
+		if (!pkt->r_bios[i])
+			goto no_rd_bio;
+	}
+
+	return pkt;
+
+no_rd_bio:
+	for (i = 0; i < frames; i++)
+		kfree(pkt->r_bios[i]);
+no_page:
+	for (i = 0; i < frames / FRAMES_PER_PAGE; i++)
+		if (pkt->pages[i])
+			__free_page(pkt->pages[i]);
+	kfree(pkt->w_bio);
+no_bio:
+	kfree(pkt);
+no_pkt:
+	return NULL;
+}
+
+/*
+ * Free a packet_data struct
+ */
+static void pkt_free_packet_data(struct packet_data *pkt)
+{
+	int i;
+
+	for (i = 0; i < pkt->frames; i++)
+		kfree(pkt->r_bios[i]);
+	for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++)
+		__free_page(pkt->pages[i]);
+	kfree(pkt->w_bio);
+	kfree(pkt);
+}
+
+static void pkt_shrink_pktlist(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *next;
+
+	BUG_ON(!list_empty(&pd->cdrw.pkt_active_list));
+
+	list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) {
+		pkt_free_packet_data(pkt);
+	}
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+}
+
+static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets)
+{
+	struct packet_data *pkt;
+
+	BUG_ON(!list_empty(&pd->cdrw.pkt_free_list));
+
+	while (nr_packets > 0) {
+		pkt = pkt_alloc_packet_data(pd->settings.size >> 2);
+		if (!pkt) {
+			pkt_shrink_pktlist(pd);
+			return 0;
+		}
+		pkt->id = nr_packets;
+		pkt->pd = pd;
+		list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+		nr_packets--;
+	}
+	return 1;
+}
+
+static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node)
+{
+	struct rb_node *n = rb_next(&node->rb_node);
+	if (!n)
+		return NULL;
+	return rb_entry(n, struct pkt_rb_node, rb_node);
+}
+
+static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+	rb_erase(&node->rb_node, &pd->bio_queue);
+	mempool_free(node, &pd->rb_pool);
+	pd->bio_queue_size--;
+	BUG_ON(pd->bio_queue_size < 0);
+}
+
+/*
+ * Find the first node in the pd->bio_queue rb tree with a starting sector >= s.
+ */
+static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s)
+{
+	struct rb_node *n = pd->bio_queue.rb_node;
+	struct rb_node *next;
+	struct pkt_rb_node *tmp;
+
+	if (!n) {
+		BUG_ON(pd->bio_queue_size > 0);
+		return NULL;
+	}
+
+	for (;;) {
+		tmp = rb_entry(n, struct pkt_rb_node, rb_node);
+		if (s <= tmp->bio->bi_iter.bi_sector)
+			next = n->rb_left;
+		else
+			next = n->rb_right;
+		if (!next)
+			break;
+		n = next;
+	}
+
+	if (s > tmp->bio->bi_iter.bi_sector) {
+		tmp = pkt_rbtree_next(tmp);
+		if (!tmp)
+			return NULL;
+	}
+	BUG_ON(s > tmp->bio->bi_iter.bi_sector);
+	return tmp;
+}
+
+/*
+ * Insert a node into the pd->bio_queue rb tree.
+ */
+static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node)
+{
+	struct rb_node **p = &pd->bio_queue.rb_node;
+	struct rb_node *parent = NULL;
+	sector_t s = node->bio->bi_iter.bi_sector;
+	struct pkt_rb_node *tmp;
+
+	while (*p) {
+		parent = *p;
+		tmp = rb_entry(parent, struct pkt_rb_node, rb_node);
+		if (s < tmp->bio->bi_iter.bi_sector)
+			p = &(*p)->rb_left;
+		else
+			p = &(*p)->rb_right;
+	}
+	rb_link_node(&node->rb_node, parent, p);
+	rb_insert_color(&node->rb_node, &pd->bio_queue);
+	pd->bio_queue_size++;
+}
+
+/*
+ * Send a packet_command to the underlying block device and
+ * wait for completion.
+ */
+static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+	struct request_queue *q = bdev_get_queue(pd->bdev);
+	struct scsi_cmnd *scmd;
+	struct request *rq;
+	int ret = 0;
+
+	rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ?
+			     REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0);
+	if (IS_ERR(rq))
+		return PTR_ERR(rq);
+	scmd = blk_mq_rq_to_pdu(rq);
+
+	if (cgc->buflen) {
+		ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen,
+				      GFP_NOIO);
+		if (ret)
+			goto out;
+	}
+
+	scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]);
+	memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE);
+
+	rq->timeout = 60*HZ;
+	if (cgc->quiet)
+		rq->rq_flags |= RQF_QUIET;
+
+	blk_execute_rq(rq, false);
+	if (scmd->result)
+		ret = -EIO;
+out:
+	blk_mq_free_request(rq);
+	return ret;
+}
+
+static const char *sense_key_string(__u8 index)
+{
+	static const char * const info[] = {
+		"No sense", "Recovered error", "Not ready",
+		"Medium error", "Hardware error", "Illegal request",
+		"Unit attention", "Data protect", "Blank check",
+	};
+
+	return index < ARRAY_SIZE(info) ? info[index] : "INVALID";
+}
+
+/*
+ * A generic sense dump / resolve mechanism should be implemented across
+ * all ATAPI + SCSI devices.
+ */
+static void pkt_dump_sense(struct pktcdvd_device *pd,
+			   struct packet_command *cgc)
+{
+	struct scsi_sense_hdr *sshdr = cgc->sshdr;
+
+	if (sshdr)
+		pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
+			CDROM_PACKET_SIZE, cgc->cmd,
+			sshdr->sense_key, sshdr->asc, sshdr->ascq,
+			sense_key_string(sshdr->sense_key));
+	else
+		pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
+}
+
+/*
+ * flush the drive cache to media
+ */
+static int pkt_flush_cache(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.cmd[0] = GPCMD_FLUSH_CACHE;
+	cgc.quiet = 1;
+
+	/*
+	 * the IMMED bit -- we default to not setting it, although that
+	 * would allow a much faster close, this is safer
+	 */
+#if 0
+	cgc.cmd[1] = 1 << 1;
+#endif
+	return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * speed is given as the normal factor, e.g. 4 for 4x
+ */
+static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
+				unsigned write_speed, unsigned read_speed)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	int ret;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.sshdr = &sshdr;
+	cgc.cmd[0] = GPCMD_SET_SPEED;
+	cgc.cmd[2] = (read_speed >> 8) & 0xff;
+	cgc.cmd[3] = read_speed & 0xff;
+	cgc.cmd[4] = (write_speed >> 8) & 0xff;
+	cgc.cmd[5] = write_speed & 0xff;
+
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		pkt_dump_sense(pd, &cgc);
+
+	return ret;
+}
+
+/*
+ * Queue a bio for processing by the low-level CD device. Must be called
+ * from process context.
+ */
+static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio)
+{
+	spin_lock(&pd->iosched.lock);
+	if (bio_data_dir(bio) == READ)
+		bio_list_add(&pd->iosched.read_queue, bio);
+	else
+		bio_list_add(&pd->iosched.write_queue, bio);
+	spin_unlock(&pd->iosched.lock);
+
+	atomic_set(&pd->iosched.attention, 1);
+	wake_up(&pd->wqueue);
+}
+
+/*
+ * Process the queued read/write requests. This function handles special
+ * requirements for CDRW drives:
+ * - A cache flush command must be inserted before a read request if the
+ *   previous request was a write.
+ * - Switching between reading and writing is slow, so don't do it more often
+ *   than necessary.
+ * - Optimize for throughput at the expense of latency. This means that streaming
+ *   writes will never be interrupted by a read, but if the drive has to seek
+ *   before the next write, switch to reading instead if there are any pending
+ *   read requests.
+ * - Set the read speed according to current usage pattern. When only reading
+ *   from the device, it's best to use the highest possible read speed, but
+ *   when switching often between reading and writing, it's better to have the
+ *   same read and write speeds.
+ */
+static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
+{
+
+	if (atomic_read(&pd->iosched.attention) == 0)
+		return;
+	atomic_set(&pd->iosched.attention, 0);
+
+	for (;;) {
+		struct bio *bio;
+		int reads_queued, writes_queued;
+
+		spin_lock(&pd->iosched.lock);
+		reads_queued = !bio_list_empty(&pd->iosched.read_queue);
+		writes_queued = !bio_list_empty(&pd->iosched.write_queue);
+		spin_unlock(&pd->iosched.lock);
+
+		if (!reads_queued && !writes_queued)
+			break;
+
+		if (pd->iosched.writing) {
+			int need_write_seek = 1;
+			spin_lock(&pd->iosched.lock);
+			bio = bio_list_peek(&pd->iosched.write_queue);
+			spin_unlock(&pd->iosched.lock);
+			if (bio && (bio->bi_iter.bi_sector ==
+				    pd->iosched.last_write))
+				need_write_seek = 0;
+			if (need_write_seek && reads_queued) {
+				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+					pkt_dbg(2, pd, "write, waiting\n");
+					break;
+				}
+				pkt_flush_cache(pd);
+				pd->iosched.writing = 0;
+			}
+		} else {
+			if (!reads_queued && writes_queued) {
+				if (atomic_read(&pd->cdrw.pending_bios) > 0) {
+					pkt_dbg(2, pd, "read, waiting\n");
+					break;
+				}
+				pd->iosched.writing = 1;
+			}
+		}
+
+		spin_lock(&pd->iosched.lock);
+		if (pd->iosched.writing)
+			bio = bio_list_pop(&pd->iosched.write_queue);
+		else
+			bio = bio_list_pop(&pd->iosched.read_queue);
+		spin_unlock(&pd->iosched.lock);
+
+		if (!bio)
+			continue;
+
+		if (bio_data_dir(bio) == READ)
+			pd->iosched.successive_reads +=
+				bio->bi_iter.bi_size >> 10;
+		else {
+			pd->iosched.successive_reads = 0;
+			pd->iosched.last_write = bio_end_sector(bio);
+		}
+		if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) {
+			if (pd->read_speed == pd->write_speed) {
+				pd->read_speed = MAX_SPEED;
+				pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+			}
+		} else {
+			if (pd->read_speed != pd->write_speed) {
+				pd->read_speed = pd->write_speed;
+				pkt_set_speed(pd, pd->write_speed, pd->read_speed);
+			}
+		}
+
+		atomic_inc(&pd->cdrw.pending_bios);
+		submit_bio_noacct(bio);
+	}
+}
+
+/*
+ * Special care is needed if the underlying block device has a small
+ * max_phys_segments value.
+ */
+static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q)
+{
+	if ((pd->settings.size << 9) / CD_FRAMESIZE
+	    <= queue_max_segments(q)) {
+		/*
+		 * The cdrom device can handle one segment/frame
+		 */
+		clear_bit(PACKET_MERGE_SEGS, &pd->flags);
+		return 0;
+	} else if ((pd->settings.size << 9) / PAGE_SIZE
+		   <= queue_max_segments(q)) {
+		/*
+		 * We can handle this case at the expense of some extra memory
+		 * copies during write operations
+		 */
+		set_bit(PACKET_MERGE_SEGS, &pd->flags);
+		return 0;
+	} else {
+		pkt_err(pd, "cdrom max_phys_segments too small\n");
+		return -EIO;
+	}
+}
+
+static void pkt_end_io_read(struct bio *bio)
+{
+	struct packet_data *pkt = bio->bi_private;
+	struct pktcdvd_device *pd = pkt->pd;
+	BUG_ON(!pd);
+
+	pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
+		bio, (unsigned long long)pkt->sector,
+		(unsigned long long)bio->bi_iter.bi_sector, bio->bi_status);
+
+	if (bio->bi_status)
+		atomic_inc(&pkt->io_errors);
+	bio_uninit(bio);
+	if (atomic_dec_and_test(&pkt->io_wait)) {
+		atomic_inc(&pkt->run_sm);
+		wake_up(&pd->wqueue);
+	}
+	pkt_bio_finished(pd);
+}
+
+static void pkt_end_io_packet_write(struct bio *bio)
+{
+	struct packet_data *pkt = bio->bi_private;
+	struct pktcdvd_device *pd = pkt->pd;
+	BUG_ON(!pd);
+
+	pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status);
+
+	pd->stats.pkt_ended++;
+
+	bio_uninit(bio);
+	pkt_bio_finished(pd);
+	atomic_dec(&pkt->io_wait);
+	atomic_inc(&pkt->run_sm);
+	wake_up(&pd->wqueue);
+}
+
+/*
+ * Schedule reads for the holes in a packet
+ */
+static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	int frames_read = 0;
+	struct bio *bio;
+	int f;
+	char written[PACKET_MAX_SIZE];
+
+	BUG_ON(bio_list_empty(&pkt->orig_bios));
+
+	atomic_set(&pkt->io_wait, 0);
+	atomic_set(&pkt->io_errors, 0);
+
+	/*
+	 * Figure out which frames we need to read before we can write.
+	 */
+	memset(written, 0, sizeof(written));
+	spin_lock(&pkt->lock);
+	bio_list_for_each(bio, &pkt->orig_bios) {
+		int first_frame = (bio->bi_iter.bi_sector - pkt->sector) /
+			(CD_FRAMESIZE >> 9);
+		int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE;
+		pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9);
+		BUG_ON(first_frame < 0);
+		BUG_ON(first_frame + num_frames > pkt->frames);
+		for (f = first_frame; f < first_frame + num_frames; f++)
+			written[f] = 1;
+	}
+	spin_unlock(&pkt->lock);
+
+	if (pkt->cache_valid) {
+		pkt_dbg(2, pd, "zone %llx cached\n",
+			(unsigned long long)pkt->sector);
+		goto out_account;
+	}
+
+	/*
+	 * Schedule reads for missing parts of the packet.
+	 */
+	for (f = 0; f < pkt->frames; f++) {
+		int p, offset;
+
+		if (written[f])
+			continue;
+
+		bio = pkt->r_bios[f];
+		bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ);
+		bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9);
+		bio->bi_end_io = pkt_end_io_read;
+		bio->bi_private = pkt;
+
+		p = (f * CD_FRAMESIZE) / PAGE_SIZE;
+		offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+		pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
+			f, pkt->pages[p], offset);
+		if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
+			BUG();
+
+		atomic_inc(&pkt->io_wait);
+		pkt_queue_bio(pd, bio);
+		frames_read++;
+	}
+
+out_account:
+	pkt_dbg(2, pd, "need %d frames for zone %llx\n",
+		frames_read, (unsigned long long)pkt->sector);
+	pd->stats.pkt_started++;
+	pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
+}
+
+/*
+ * Find a packet matching zone, or the least recently used packet if
+ * there is no match.
+ */
+static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone)
+{
+	struct packet_data *pkt;
+
+	list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) {
+		if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) {
+			list_del_init(&pkt->list);
+			if (pkt->sector != zone)
+				pkt->cache_valid = 0;
+			return pkt;
+		}
+	}
+	BUG();
+	return NULL;
+}
+
+static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	if (pkt->cache_valid) {
+		list_add(&pkt->list, &pd->cdrw.pkt_free_list);
+	} else {
+		list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list);
+	}
+}
+
+static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state)
+{
+#if PACKET_DEBUG > 1
+	static const char *state_name[] = {
+		"IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
+	};
+	enum packet_data_state old_state = pkt->state;
+	pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
+		pkt->id, (unsigned long long)pkt->sector,
+		state_name[old_state], state_name[state]);
+#endif
+	pkt->state = state;
+}
+
+/*
+ * Scan the work queue to see if we can start a new packet.
+ * returns non-zero if any work was done.
+ */
+static int pkt_handle_queue(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *p;
+	struct bio *bio = NULL;
+	sector_t zone = 0; /* Suppress gcc warning */
+	struct pkt_rb_node *node, *first_node;
+	struct rb_node *n;
+
+	atomic_set(&pd->scan_queue, 0);
+
+	if (list_empty(&pd->cdrw.pkt_free_list)) {
+		pkt_dbg(2, pd, "no pkt\n");
+		return 0;
+	}
+
+	/*
+	 * Try to find a zone we are not already working on.
+	 */
+	spin_lock(&pd->lock);
+	first_node = pkt_rbtree_find(pd, pd->current_sector);
+	if (!first_node) {
+		n = rb_first(&pd->bio_queue);
+		if (n)
+			first_node = rb_entry(n, struct pkt_rb_node, rb_node);
+	}
+	node = first_node;
+	while (node) {
+		bio = node->bio;
+		zone = get_zone(bio->bi_iter.bi_sector, pd);
+		list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
+			if (p->sector == zone) {
+				bio = NULL;
+				goto try_next_bio;
+			}
+		}
+		break;
+try_next_bio:
+		node = pkt_rbtree_next(node);
+		if (!node) {
+			n = rb_first(&pd->bio_queue);
+			if (n)
+				node = rb_entry(n, struct pkt_rb_node, rb_node);
+		}
+		if (node == first_node)
+			node = NULL;
+	}
+	spin_unlock(&pd->lock);
+	if (!bio) {
+		pkt_dbg(2, pd, "no bio\n");
+		return 0;
+	}
+
+	pkt = pkt_get_packet_data(pd, zone);
+
+	pd->current_sector = zone + pd->settings.size;
+	pkt->sector = zone;
+	BUG_ON(pkt->frames != pd->settings.size >> 2);
+	pkt->write_size = 0;
+
+	/*
+	 * Scan work queue for bios in the same zone and link them
+	 * to this packet.
+	 */
+	spin_lock(&pd->lock);
+	pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
+	while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
+		bio = node->bio;
+		pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long)
+			get_zone(bio->bi_iter.bi_sector, pd));
+		if (get_zone(bio->bi_iter.bi_sector, pd) != zone)
+			break;
+		pkt_rbtree_erase(pd, node);
+		spin_lock(&pkt->lock);
+		bio_list_add(&pkt->orig_bios, bio);
+		pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE;
+		spin_unlock(&pkt->lock);
+	}
+	/* check write congestion marks, and if bio_queue_size is
+	 * below, wake up any waiters
+	 */
+	if (pd->congested &&
+	    pd->bio_queue_size <= pd->write_congestion_off) {
+		pd->congested = false;
+		wake_up_var(&pd->congested);
+	}
+	spin_unlock(&pd->lock);
+
+	pkt->sleep_time = max(PACKET_WAIT_TIME, 1);
+	pkt_set_state(pkt, PACKET_WAITING_STATE);
+	atomic_set(&pkt->run_sm, 1);
+
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_add(&pkt->list, &pd->cdrw.pkt_active_list);
+	spin_unlock(&pd->cdrw.active_list_lock);
+
+	return 1;
+}
+
+/**
+ * bio_list_copy_data - copy contents of data buffers from one chain of bios to
+ * another
+ * @src: source bio list
+ * @dst: destination bio list
+ *
+ * Stops when it reaches the end of either the @src list or @dst list - that is,
+ * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of
+ * bios).
+ */
+static void bio_list_copy_data(struct bio *dst, struct bio *src)
+{
+	struct bvec_iter src_iter = src->bi_iter;
+	struct bvec_iter dst_iter = dst->bi_iter;
+
+	while (1) {
+		if (!src_iter.bi_size) {
+			src = src->bi_next;
+			if (!src)
+				break;
+
+			src_iter = src->bi_iter;
+		}
+
+		if (!dst_iter.bi_size) {
+			dst = dst->bi_next;
+			if (!dst)
+				break;
+
+			dst_iter = dst->bi_iter;
+		}
+
+		bio_copy_data_iter(dst, &dst_iter, src, &src_iter);
+	}
+}
+
+/*
+ * Assemble a bio to write one packet and queue the bio for processing
+ * by the underlying block device.
+ */
+static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	int f;
+
+	bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames,
+		 REQ_OP_WRITE);
+	pkt->w_bio->bi_iter.bi_sector = pkt->sector;
+	pkt->w_bio->bi_end_io = pkt_end_io_packet_write;
+	pkt->w_bio->bi_private = pkt;
+
+	/* XXX: locking? */
+	for (f = 0; f < pkt->frames; f++) {
+		struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE];
+		unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
+
+		if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset))
+			BUG();
+	}
+	pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
+
+	/*
+	 * Fill-in bvec with data from orig_bios.
+	 */
+	spin_lock(&pkt->lock);
+	bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head);
+
+	pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
+	spin_unlock(&pkt->lock);
+
+	pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
+		pkt->write_size, (unsigned long long)pkt->sector);
+
+	if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames))
+		pkt->cache_valid = 1;
+	else
+		pkt->cache_valid = 0;
+
+	/* Start the write request */
+	atomic_set(&pkt->io_wait, 1);
+	pkt_queue_bio(pd, pkt->w_bio);
+}
+
+static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status)
+{
+	struct bio *bio;
+
+	if (status)
+		pkt->cache_valid = 0;
+
+	/* Finish all bios corresponding to this packet */
+	while ((bio = bio_list_pop(&pkt->orig_bios))) {
+		bio->bi_status = status;
+		bio_endio(bio);
+	}
+}
+
+static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt)
+{
+	pkt_dbg(2, pd, "pkt %d\n", pkt->id);
+
+	for (;;) {
+		switch (pkt->state) {
+		case PACKET_WAITING_STATE:
+			if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0))
+				return;
+
+			pkt->sleep_time = 0;
+			pkt_gather_data(pd, pkt);
+			pkt_set_state(pkt, PACKET_READ_WAIT_STATE);
+			break;
+
+		case PACKET_READ_WAIT_STATE:
+			if (atomic_read(&pkt->io_wait) > 0)
+				return;
+
+			if (atomic_read(&pkt->io_errors) > 0) {
+				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+			} else {
+				pkt_start_write(pd, pkt);
+			}
+			break;
+
+		case PACKET_WRITE_WAIT_STATE:
+			if (atomic_read(&pkt->io_wait) > 0)
+				return;
+
+			if (!pkt->w_bio->bi_status) {
+				pkt_set_state(pkt, PACKET_FINISHED_STATE);
+			} else {
+				pkt_set_state(pkt, PACKET_RECOVERY_STATE);
+			}
+			break;
+
+		case PACKET_RECOVERY_STATE:
+			pkt_dbg(2, pd, "No recovery possible\n");
+			pkt_set_state(pkt, PACKET_FINISHED_STATE);
+			break;
+
+		case PACKET_FINISHED_STATE:
+			pkt_finish_packet(pkt, pkt->w_bio->bi_status);
+			return;
+
+		default:
+			BUG();
+			break;
+		}
+	}
+}
+
+static void pkt_handle_packets(struct pktcdvd_device *pd)
+{
+	struct packet_data *pkt, *next;
+
+	/*
+	 * Run state machine for active packets
+	 */
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		if (atomic_read(&pkt->run_sm) > 0) {
+			atomic_set(&pkt->run_sm, 0);
+			pkt_run_state_machine(pd, pkt);
+		}
+	}
+
+	/*
+	 * Move no longer active packets to the free list
+	 */
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) {
+		if (pkt->state == PACKET_FINISHED_STATE) {
+			list_del(&pkt->list);
+			pkt_put_packet_data(pd, pkt);
+			pkt_set_state(pkt, PACKET_IDLE_STATE);
+			atomic_set(&pd->scan_queue, 1);
+		}
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+static void pkt_count_states(struct pktcdvd_device *pd, int *states)
+{
+	struct packet_data *pkt;
+	int i;
+
+	for (i = 0; i < PACKET_NUM_STATES; i++)
+		states[i] = 0;
+
+	spin_lock(&pd->cdrw.active_list_lock);
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		states[pkt->state]++;
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+}
+
+/*
+ * kcdrwd is woken up when writes have been queued for one of our
+ * registered devices
+ */
+static int kcdrwd(void *foobar)
+{
+	struct pktcdvd_device *pd = foobar;
+	struct packet_data *pkt;
+	long min_sleep_time, residue;
+
+	set_user_nice(current, MIN_NICE);
+	set_freezable();
+
+	for (;;) {
+		DECLARE_WAITQUEUE(wait, current);
+
+		/*
+		 * Wait until there is something to do
+		 */
+		add_wait_queue(&pd->wqueue, &wait);
+		for (;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+
+			/* Check if we need to run pkt_handle_queue */
+			if (atomic_read(&pd->scan_queue) > 0)
+				goto work_to_do;
+
+			/* Check if we need to run the state machine for some packet */
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (atomic_read(&pkt->run_sm) > 0)
+					goto work_to_do;
+			}
+
+			/* Check if we need to process the iosched queues */
+			if (atomic_read(&pd->iosched.attention) != 0)
+				goto work_to_do;
+
+			/* Otherwise, go to sleep */
+			if (PACKET_DEBUG > 1) {
+				int states[PACKET_NUM_STATES];
+				pkt_count_states(pd, states);
+				pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+					states[0], states[1], states[2],
+					states[3], states[4], states[5]);
+			}
+
+			min_sleep_time = MAX_SCHEDULE_TIMEOUT;
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (pkt->sleep_time && pkt->sleep_time < min_sleep_time)
+					min_sleep_time = pkt->sleep_time;
+			}
+
+			pkt_dbg(2, pd, "sleeping\n");
+			residue = schedule_timeout(min_sleep_time);
+			pkt_dbg(2, pd, "wake up\n");
+
+			/* make swsusp happy with our thread */
+			try_to_freeze();
+
+			list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+				if (!pkt->sleep_time)
+					continue;
+				pkt->sleep_time -= min_sleep_time - residue;
+				if (pkt->sleep_time <= 0) {
+					pkt->sleep_time = 0;
+					atomic_inc(&pkt->run_sm);
+				}
+			}
+
+			if (kthread_should_stop())
+				break;
+		}
+work_to_do:
+		set_current_state(TASK_RUNNING);
+		remove_wait_queue(&pd->wqueue, &wait);
+
+		if (kthread_should_stop())
+			break;
+
+		/*
+		 * if pkt_handle_queue returns true, we can queue
+		 * another request.
+		 */
+		while (pkt_handle_queue(pd))
+			;
+
+		/*
+		 * Handle packet state machine
+		 */
+		pkt_handle_packets(pd);
+
+		/*
+		 * Handle iosched queues
+		 */
+		pkt_iosched_process_queue(pd);
+	}
+
+	return 0;
+}
+
+static void pkt_print_settings(struct pktcdvd_device *pd)
+{
+	pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
+		 pd->settings.fp ? "Fixed" : "Variable",
+		 pd->settings.size >> 2,
+		 pd->settings.block_mode == 8 ? '1' : '2');
+}
+
+static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control)
+{
+	memset(cgc->cmd, 0, sizeof(cgc->cmd));
+
+	cgc->cmd[0] = GPCMD_MODE_SENSE_10;
+	cgc->cmd[2] = page_code | (page_control << 6);
+	cgc->cmd[7] = cgc->buflen >> 8;
+	cgc->cmd[8] = cgc->buflen & 0xff;
+	cgc->data_direction = CGC_DATA_READ;
+	return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc)
+{
+	memset(cgc->cmd, 0, sizeof(cgc->cmd));
+	memset(cgc->buffer, 0, 2);
+	cgc->cmd[0] = GPCMD_MODE_SELECT_10;
+	cgc->cmd[1] = 0x10;		/* PF */
+	cgc->cmd[7] = cgc->buflen >> 8;
+	cgc->cmd[8] = cgc->buflen & 0xff;
+	cgc->data_direction = CGC_DATA_WRITE;
+	return pkt_generic_packet(pd, cgc);
+}
+
+static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di)
+{
+	struct packet_command cgc;
+	int ret;
+
+	/* set up command and get the disc info */
+	init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_DISC_INFO;
+	cgc.cmd[8] = cgc.buflen = 2;
+	cgc.quiet = 1;
+
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		return ret;
+
+	/* not all drives have the same disc_info length, so requeue
+	 * packet with the length the drive tells us it can supply
+	 */
+	cgc.buflen = be16_to_cpu(di->disc_information_length) +
+		     sizeof(di->disc_information_length);
+
+	if (cgc.buflen > sizeof(disc_information))
+		cgc.buflen = sizeof(disc_information);
+
+	cgc.cmd[8] = cgc.buflen;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti)
+{
+	struct packet_command cgc;
+	int ret;
+
+	init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO;
+	cgc.cmd[1] = type & 3;
+	cgc.cmd[4] = (track & 0xff00) >> 8;
+	cgc.cmd[5] = track & 0xff;
+	cgc.cmd[8] = 8;
+	cgc.quiet = 1;
+
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		return ret;
+
+	cgc.buflen = be16_to_cpu(ti->track_information_length) +
+		     sizeof(ti->track_information_length);
+
+	if (cgc.buflen > sizeof(track_information))
+		cgc.buflen = sizeof(track_information);
+
+	cgc.cmd[8] = cgc.buflen;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
+						long *last_written)
+{
+	disc_information di;
+	track_information ti;
+	__u32 last_track;
+	int ret;
+
+	ret = pkt_get_disc_info(pd, &di);
+	if (ret)
+		return ret;
+
+	last_track = (di.last_track_msb << 8) | di.last_track_lsb;
+	ret = pkt_get_track_info(pd, last_track, 1, &ti);
+	if (ret)
+		return ret;
+
+	/* if this track is blank, try the previous. */
+	if (ti.blank) {
+		last_track--;
+		ret = pkt_get_track_info(pd, last_track, 1, &ti);
+		if (ret)
+			return ret;
+	}
+
+	/* if last recorded field is valid, return it. */
+	if (ti.lra_v) {
+		*last_written = be32_to_cpu(ti.last_rec_address);
+	} else {
+		/* make it up instead */
+		*last_written = be32_to_cpu(ti.track_start) +
+				be32_to_cpu(ti.track_size);
+		if (ti.free_blocks)
+			*last_written -= (be32_to_cpu(ti.free_blocks) + 7);
+	}
+	return 0;
+}
+
+/*
+ * write mode select package based on pd->settings
+ */
+static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	write_param_page *wp;
+	char buffer[128];
+	int ret, size;
+
+	/* doesn't apply to DVD+RW or DVD-RAM */
+	if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12))
+		return 0;
+
+	memset(buffer, 0, sizeof(buffer));
+	init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff));
+	pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff);
+	if (size > sizeof(buffer))
+		size = sizeof(buffer);
+
+	/*
+	 * now get it all
+	 */
+	init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	/*
+	 * write page is offset header + block descriptor length
+	 */
+	wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset];
+
+	wp->fp = pd->settings.fp;
+	wp->track_mode = pd->settings.track_mode;
+	wp->write_type = pd->settings.write_type;
+	wp->data_block_type = pd->settings.block_mode;
+
+	wp->multi_session = 0;
+
+#ifdef PACKET_USE_LS
+	wp->link_size = 7;
+	wp->ls_v = 1;
+#endif
+
+	if (wp->data_block_type == PACKET_BLOCK_MODE1) {
+		wp->session_format = 0;
+		wp->subhdr2 = 0x20;
+	} else if (wp->data_block_type == PACKET_BLOCK_MODE2) {
+		wp->session_format = 0x20;
+		wp->subhdr2 = 8;
+#if 0
+		wp->mcn[0] = 0x80;
+		memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1);
+#endif
+	} else {
+		/*
+		 * paranoia
+		 */
+		pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
+		return 1;
+	}
+	wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
+
+	cgc.buflen = cgc.cmd[8] = size;
+	ret = pkt_mode_select(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	pkt_print_settings(pd);
+	return 0;
+}
+
+/*
+ * 1 -- we can write to this track, 0 -- we can't
+ */
+static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
+{
+	switch (pd->mmc3_profile) {
+		case 0x1a: /* DVD+RW */
+		case 0x12: /* DVD-RAM */
+			/* The track is always writable on DVD+RW/DVD-RAM */
+			return 1;
+		default:
+			break;
+	}
+
+	if (!ti->packet || !ti->fp)
+		return 0;
+
+	/*
+	 * "good" settings as per Mt Fuji.
+	 */
+	if (ti->rt == 0 && ti->blank == 0)
+		return 1;
+
+	if (ti->rt == 0 && ti->blank == 1)
+		return 1;
+
+	if (ti->rt == 1 && ti->blank == 0)
+		return 1;
+
+	pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+	return 0;
+}
+
+/*
+ * 1 -- we can write to this disc, 0 -- we can't
+ */
+static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
+{
+	switch (pd->mmc3_profile) {
+		case 0x0a: /* CD-RW */
+		case 0xffff: /* MMC3 not supported */
+			break;
+		case 0x1a: /* DVD+RW */
+		case 0x13: /* DVD-RW */
+		case 0x12: /* DVD-RAM */
+			return 1;
+		default:
+			pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
+				pd->mmc3_profile);
+			return 0;
+	}
+
+	/*
+	 * for disc type 0xff we should probably reserve a new track.
+	 * but i'm not sure, should we leave this to user apps? probably.
+	 */
+	if (di->disc_type == 0xff) {
+		pkt_notice(pd, "unknown disc - no track?\n");
+		return 0;
+	}
+
+	if (di->disc_type != 0x20 && di->disc_type != 0) {
+		pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
+		return 0;
+	}
+
+	if (di->erasable == 0) {
+		pkt_notice(pd, "disc not erasable\n");
+		return 0;
+	}
+
+	if (di->border_status == PACKET_SESSION_RESERVED) {
+		pkt_err(pd, "can't write to last track (reserved)\n");
+		return 0;
+	}
+
+	return 1;
+}
+
+static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	unsigned char buf[12];
+	disc_information di;
+	track_information ti;
+	int ret, track;
+
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+	cgc.cmd[0] = GPCMD_GET_CONFIGURATION;
+	cgc.cmd[8] = 8;
+	ret = pkt_generic_packet(pd, &cgc);
+	pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7];
+
+	memset(&di, 0, sizeof(disc_information));
+	memset(&ti, 0, sizeof(track_information));
+
+	ret = pkt_get_disc_info(pd, &di);
+	if (ret) {
+		pkt_err(pd, "failed get_disc\n");
+		return ret;
+	}
+
+	if (!pkt_writable_disc(pd, &di))
+		return -EROFS;
+
+	pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR;
+
+	track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
+	ret = pkt_get_track_info(pd, track, 1, &ti);
+	if (ret) {
+		pkt_err(pd, "failed get_track\n");
+		return ret;
+	}
+
+	if (!pkt_writable_track(pd, &ti)) {
+		pkt_err(pd, "can't write to this track\n");
+		return -EROFS;
+	}
+
+	/*
+	 * we keep packet size in 512 byte units, makes it easier to
+	 * deal with request calculations.
+	 */
+	pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
+	if (pd->settings.size == 0) {
+		pkt_notice(pd, "detected zero packet size!\n");
+		return -ENXIO;
+	}
+	if (pd->settings.size > PACKET_MAX_SECTORS) {
+		pkt_err(pd, "packet size is too big\n");
+		return -EROFS;
+	}
+	pd->settings.fp = ti.fp;
+	pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1);
+
+	if (ti.nwa_v) {
+		pd->nwa = be32_to_cpu(ti.next_writable);
+		set_bit(PACKET_NWA_VALID, &pd->flags);
+	}
+
+	/*
+	 * in theory we could use lra on -RW media as well and just zero
+	 * blocks that haven't been written yet, but in practice that
+	 * is just a no-go. we'll use that for -R, naturally.
+	 */
+	if (ti.lra_v) {
+		pd->lra = be32_to_cpu(ti.last_rec_address);
+		set_bit(PACKET_LRA_VALID, &pd->flags);
+	} else {
+		pd->lra = 0xffffffff;
+		set_bit(PACKET_LRA_VALID, &pd->flags);
+	}
+
+	/*
+	 * fine for now
+	 */
+	pd->settings.link_loss = 7;
+	pd->settings.write_type = 0;	/* packet */
+	pd->settings.track_mode = ti.track_mode;
+
+	/*
+	 * mode1 or mode2 disc
+	 */
+	switch (ti.data_mode) {
+		case PACKET_MODE1:
+			pd->settings.block_mode = PACKET_BLOCK_MODE1;
+			break;
+		case PACKET_MODE2:
+			pd->settings.block_mode = PACKET_BLOCK_MODE2;
+			break;
+		default:
+			pkt_err(pd, "unknown data mode\n");
+			return -EROFS;
+	}
+	return 0;
+}
+
+/*
+ * enable/disable write caching on drive
+ */
+static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
+						int set)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	unsigned char buf[64];
+	int ret;
+
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	cgc.buflen = pd->mode_offset + 12;
+
+	/*
+	 * caching mode page might not be there, so quiet this command
+	 */
+	cgc.quiet = 1;
+
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0);
+	if (ret)
+		return ret;
+
+	buf[pd->mode_offset + 10] |= (!!set << 2);
+
+	cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
+	ret = pkt_mode_select(pd, &cgc);
+	if (ret) {
+		pkt_err(pd, "write caching control failed\n");
+		pkt_dump_sense(pd, &cgc);
+	} else if (!ret && set)
+		pkt_notice(pd, "enabled write caching\n");
+	return ret;
+}
+
+static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag)
+{
+	struct packet_command cgc;
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL;
+	cgc.cmd[4] = lockflag ? 1 : 0;
+	return pkt_generic_packet(pd, &cgc);
+}
+
+/*
+ * Returns drive maximum write speed
+ */
+static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
+						unsigned *write_speed)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	unsigned char buf[256+18];
+	unsigned char *cap_buf;
+	int ret, offset;
+
+	cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset];
+	init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN);
+	cgc.sshdr = &sshdr;
+
+	ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+	if (ret) {
+		cgc.buflen = pd->mode_offset + cap_buf[1] + 2 +
+			     sizeof(struct mode_page_header);
+		ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
+		if (ret) {
+			pkt_dump_sense(pd, &cgc);
+			return ret;
+		}
+	}
+
+	offset = 20;			    /* Obsoleted field, used by older drives */
+	if (cap_buf[1] >= 28)
+		offset = 28;		    /* Current write speed selected */
+	if (cap_buf[1] >= 30) {
+		/* If the drive reports at least one "Logical Unit Write
+		 * Speed Performance Descriptor Block", use the information
+		 * in the first block. (contains the highest speed)
+		 */
+		int num_spdb = (cap_buf[30] << 8) + cap_buf[31];
+		if (num_spdb > 0)
+			offset = 34;
+	}
+
+	*write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1];
+	return 0;
+}
+
+/* These tables from cdrecord - I don't have orange book */
+/* standard speed CD-RW (1-4x) */
+static char clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* high speed CD-RW (-10x) */
+static char hs_clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+/* ultra high speed CD-RW */
+static char us_clv_to_speed[16] = {
+	/* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 */
+	   0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0
+};
+
+/*
+ * reads the maximum media speed from ATIP
+ */
+static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
+						unsigned *speed)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	unsigned char buf[64];
+	unsigned int size, st, sp;
+	int ret;
+
+	init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+	cgc.cmd[1] = 2;
+	cgc.cmd[2] = 4; /* READ ATIP */
+	cgc.cmd[8] = 2;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+	size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
+	if (size > sizeof(buf))
+		size = sizeof(buf);
+
+	init_cdrom_command(&cgc, buf, size, CGC_DATA_READ);
+	cgc.sshdr = &sshdr;
+	cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP;
+	cgc.cmd[1] = 2;
+	cgc.cmd[2] = 4;
+	cgc.cmd[8] = size;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret) {
+		pkt_dump_sense(pd, &cgc);
+		return ret;
+	}
+
+	if (!(buf[6] & 0x40)) {
+		pkt_notice(pd, "disc type is not CD-RW\n");
+		return 1;
+	}
+	if (!(buf[6] & 0x4)) {
+		pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
+		return 1;
+	}
+
+	st = (buf[6] >> 3) & 0x7; /* disc sub-type */
+
+	sp = buf[16] & 0xf; /* max speed from ATIP A1 field */
+
+	/* Info from cdrecord */
+	switch (st) {
+		case 0: /* standard speed */
+			*speed = clv_to_speed[sp];
+			break;
+		case 1: /* high speed */
+			*speed = hs_clv_to_speed[sp];
+			break;
+		case 2: /* ultra high speed */
+			*speed = us_clv_to_speed[sp];
+			break;
+		default:
+			pkt_notice(pd, "unknown disc sub-type %d\n", st);
+			return 1;
+	}
+	if (*speed) {
+		pkt_info(pd, "maximum media speed: %d\n", *speed);
+		return 0;
+	} else {
+		pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
+		return 1;
+	}
+}
+
+static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
+{
+	struct packet_command cgc;
+	struct scsi_sense_hdr sshdr;
+	int ret;
+
+	pkt_dbg(2, pd, "Performing OPC\n");
+
+	init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
+	cgc.sshdr = &sshdr;
+	cgc.timeout = 60*HZ;
+	cgc.cmd[0] = GPCMD_SEND_OPC;
+	cgc.cmd[1] = 1;
+	ret = pkt_generic_packet(pd, &cgc);
+	if (ret)
+		pkt_dump_sense(pd, &cgc);
+	return ret;
+}
+
+static int pkt_open_write(struct pktcdvd_device *pd)
+{
+	int ret;
+	unsigned int write_speed, media_write_speed, read_speed;
+
+	ret = pkt_probe_settings(pd);
+	if (ret) {
+		pkt_dbg(2, pd, "failed probe\n");
+		return ret;
+	}
+
+	ret = pkt_set_write_settings(pd);
+	if (ret) {
+		pkt_dbg(1, pd, "failed saving write settings\n");
+		return -EIO;
+	}
+
+	pkt_write_caching(pd, USE_WCACHING);
+
+	ret = pkt_get_max_speed(pd, &write_speed);
+	if (ret)
+		write_speed = 16 * 177;
+	switch (pd->mmc3_profile) {
+		case 0x13: /* DVD-RW */
+		case 0x1a: /* DVD+RW */
+		case 0x12: /* DVD-RAM */
+			pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
+			break;
+		default:
+			ret = pkt_media_speed(pd, &media_write_speed);
+			if (ret)
+				media_write_speed = 16;
+			write_speed = min(write_speed, media_write_speed * 177);
+			pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
+			break;
+	}
+	read_speed = write_speed;
+
+	ret = pkt_set_speed(pd, write_speed, read_speed);
+	if (ret) {
+		pkt_dbg(1, pd, "couldn't set write speed\n");
+		return -EIO;
+	}
+	pd->write_speed = write_speed;
+	pd->read_speed = read_speed;
+
+	ret = pkt_perform_opc(pd);
+	if (ret) {
+		pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
+	}
+
+	return 0;
+}
+
+/*
+ * called at open time.
+ */
+static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
+{
+	int ret;
+	long lba;
+	struct request_queue *q;
+	struct block_device *bdev;
+
+	/*
+	 * We need to re-open the cdrom device without O_NONBLOCK to be able
+	 * to read/write from/to it. It is already opened in O_NONBLOCK mode
+	 * so open should not fail.
+	 */
+	bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd);
+	if (IS_ERR(bdev)) {
+		ret = PTR_ERR(bdev);
+		goto out;
+	}
+
+	ret = pkt_get_last_written(pd, &lba);
+	if (ret) {
+		pkt_err(pd, "pkt_get_last_written failed\n");
+		goto out_putdev;
+	}
+
+	set_capacity(pd->disk, lba << 2);
+	set_capacity_and_notify(pd->bdev->bd_disk, lba << 2);
+
+	q = bdev_get_queue(pd->bdev);
+	if (write) {
+		ret = pkt_open_write(pd);
+		if (ret)
+			goto out_putdev;
+		/*
+		 * Some CDRW drives can not handle writes larger than one packet,
+		 * even if the size is a multiple of the packet size.
+		 */
+		blk_queue_max_hw_sectors(q, pd->settings.size);
+		set_bit(PACKET_WRITABLE, &pd->flags);
+	} else {
+		pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+		clear_bit(PACKET_WRITABLE, &pd->flags);
+	}
+
+	ret = pkt_set_segment_merging(pd, q);
+	if (ret)
+		goto out_putdev;
+
+	if (write) {
+		if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
+			pkt_err(pd, "not enough memory for buffers\n");
+			ret = -ENOMEM;
+			goto out_putdev;
+		}
+		pkt_info(pd, "%lukB available on disc\n", lba << 1);
+	}
+
+	return 0;
+
+out_putdev:
+	blkdev_put(bdev, FMODE_READ | FMODE_EXCL);
+out:
+	return ret;
+}
+
+/*
+ * called when the device is closed. makes sure that the device flushes
+ * the internal cache before we close.
+ */
+static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
+{
+	if (flush && pkt_flush_cache(pd))
+		pkt_dbg(1, pd, "not flushing cache\n");
+
+	pkt_lock_door(pd, 0);
+
+	pkt_set_speed(pd, MAX_SPEED, MAX_SPEED);
+	blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL);
+
+	pkt_shrink_pktlist(pd);
+}
+
+static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor)
+{
+	if (dev_minor >= MAX_WRITERS)
+		return NULL;
+
+	dev_minor = array_index_nospec(dev_minor, MAX_WRITERS);
+	return pkt_devs[dev_minor];
+}
+
+static int pkt_open(struct block_device *bdev, fmode_t mode)
+{
+	struct pktcdvd_device *pd = NULL;
+	int ret;
+
+	mutex_lock(&pktcdvd_mutex);
+	mutex_lock(&ctl_mutex);
+	pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
+	if (!pd) {
+		ret = -ENODEV;
+		goto out;
+	}
+	BUG_ON(pd->refcnt < 0);
+
+	pd->refcnt++;
+	if (pd->refcnt > 1) {
+		if ((mode & FMODE_WRITE) &&
+		    !test_bit(PACKET_WRITABLE, &pd->flags)) {
+			ret = -EBUSY;
+			goto out_dec;
+		}
+	} else {
+		ret = pkt_open_dev(pd, mode & FMODE_WRITE);
+		if (ret)
+			goto out_dec;
+		/*
+		 * needed here as well, since ext2 (among others) may change
+		 * the blocksize at mount time
+		 */
+		set_blocksize(bdev, CD_FRAMESIZE);
+	}
+
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+	return 0;
+
+out_dec:
+	pd->refcnt--;
+out:
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+	return ret;
+}
+
+static void pkt_close(struct gendisk *disk, fmode_t mode)
+{
+	struct pktcdvd_device *pd = disk->private_data;
+
+	mutex_lock(&pktcdvd_mutex);
+	mutex_lock(&ctl_mutex);
+	pd->refcnt--;
+	BUG_ON(pd->refcnt < 0);
+	if (pd->refcnt == 0) {
+		int flush = test_bit(PACKET_WRITABLE, &pd->flags);
+		pkt_release_dev(pd, flush);
+	}
+	mutex_unlock(&ctl_mutex);
+	mutex_unlock(&pktcdvd_mutex);
+}
+
+
+static void pkt_end_io_read_cloned(struct bio *bio)
+{
+	struct packet_stacked_data *psd = bio->bi_private;
+	struct pktcdvd_device *pd = psd->pd;
+
+	psd->bio->bi_status = bio->bi_status;
+	bio_put(bio);
+	bio_endio(psd->bio);
+	mempool_free(psd, &psd_pool);
+	pkt_bio_finished(pd);
+}
+
+static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio)
+{
+	struct bio *cloned_bio =
+		bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set);
+	struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO);
+
+	psd->pd = pd;
+	psd->bio = bio;
+	cloned_bio->bi_private = psd;
+	cloned_bio->bi_end_io = pkt_end_io_read_cloned;
+	pd->stats.secs_r += bio_sectors(bio);
+	pkt_queue_bio(pd, cloned_bio);
+}
+
+static void pkt_make_request_write(struct request_queue *q, struct bio *bio)
+{
+	struct pktcdvd_device *pd = q->queuedata;
+	sector_t zone;
+	struct packet_data *pkt;
+	int was_empty, blocked_bio;
+	struct pkt_rb_node *node;
+
+	zone = get_zone(bio->bi_iter.bi_sector, pd);
+
+	/*
+	 * If we find a matching packet in state WAITING or READ_WAIT, we can
+	 * just append this bio to that packet.
+	 */
+	spin_lock(&pd->cdrw.active_list_lock);
+	blocked_bio = 0;
+	list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) {
+		if (pkt->sector == zone) {
+			spin_lock(&pkt->lock);
+			if ((pkt->state == PACKET_WAITING_STATE) ||
+			    (pkt->state == PACKET_READ_WAIT_STATE)) {
+				bio_list_add(&pkt->orig_bios, bio);
+				pkt->write_size +=
+					bio->bi_iter.bi_size / CD_FRAMESIZE;
+				if ((pkt->write_size >= pkt->frames) &&
+				    (pkt->state == PACKET_WAITING_STATE)) {
+					atomic_inc(&pkt->run_sm);
+					wake_up(&pd->wqueue);
+				}
+				spin_unlock(&pkt->lock);
+				spin_unlock(&pd->cdrw.active_list_lock);
+				return;
+			} else {
+				blocked_bio = 1;
+			}
+			spin_unlock(&pkt->lock);
+		}
+	}
+	spin_unlock(&pd->cdrw.active_list_lock);
+
+	/*
+	 * Test if there is enough room left in the bio work queue
+	 * (queue size >= congestion on mark).
+	 * If not, wait till the work queue size is below the congestion off mark.
+	 */
+	spin_lock(&pd->lock);
+	if (pd->write_congestion_on > 0
+	    && pd->bio_queue_size >= pd->write_congestion_on) {
+		struct wait_bit_queue_entry wqe;
+
+		init_wait_var_entry(&wqe, &pd->congested, 0);
+		for (;;) {
+			prepare_to_wait_event(__var_waitqueue(&pd->congested),
+					      &wqe.wq_entry,
+					      TASK_UNINTERRUPTIBLE);
+			if (pd->bio_queue_size <= pd->write_congestion_off)
+				break;
+			pd->congested = true;
+			spin_unlock(&pd->lock);
+			schedule();
+			spin_lock(&pd->lock);
+		}
+	}
+	spin_unlock(&pd->lock);
+
+	/*
+	 * No matching packet found. Store the bio in the work queue.
+	 */
+	node = mempool_alloc(&pd->rb_pool, GFP_NOIO);
+	node->bio = bio;
+	spin_lock(&pd->lock);
+	BUG_ON(pd->bio_queue_size < 0);
+	was_empty = (pd->bio_queue_size == 0);
+	pkt_rbtree_insert(pd, node);
+	spin_unlock(&pd->lock);
+
+	/*
+	 * Wake up the worker thread.
+	 */
+	atomic_set(&pd->scan_queue, 1);
+	if (was_empty) {
+		/* This wake_up is required for correct operation */
+		wake_up(&pd->wqueue);
+	} else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) {
+		/*
+		 * This wake up is not required for correct operation,
+		 * but improves performance in some cases.
+		 */
+		wake_up(&pd->wqueue);
+	}
+}
+
+static void pkt_submit_bio(struct bio *bio)
+{
+	struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata;
+	struct bio *split;
+
+	bio = bio_split_to_limits(bio);
+
+	pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
+		(unsigned long long)bio->bi_iter.bi_sector,
+		(unsigned long long)bio_end_sector(bio));
+
+	/*
+	 * Clone READ bios so we can have our own bi_end_io callback.
+	 */
+	if (bio_data_dir(bio) == READ) {
+		pkt_make_request_read(pd, bio);
+		return;
+	}
+
+	if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
+		pkt_notice(pd, "WRITE for ro device (%llu)\n",
+			   (unsigned long long)bio->bi_iter.bi_sector);
+		goto end_io;
+	}
+
+	if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) {
+		pkt_err(pd, "wrong bio size\n");
+		goto end_io;
+	}
+
+	do {
+		sector_t zone = get_zone(bio->bi_iter.bi_sector, pd);
+		sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd);
+
+		if (last_zone != zone) {
+			BUG_ON(last_zone != zone + pd->settings.size);
+
+			split = bio_split(bio, last_zone -
+					  bio->bi_iter.bi_sector,
+					  GFP_NOIO, &pkt_bio_set);
+			bio_chain(split, bio);
+		} else {
+			split = bio;
+		}
+
+		pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split);
+	} while (split != bio);
+
+	return;
+end_io:
+	bio_io_error(bio);
+}
+
+static void pkt_init_queue(struct pktcdvd_device *pd)
+{
+	struct request_queue *q = pd->disk->queue;
+
+	blk_queue_logical_block_size(q, CD_FRAMESIZE);
+	blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS);
+	q->queuedata = pd;
+}
+
+static int pkt_seq_show(struct seq_file *m, void *p)
+{
+	struct pktcdvd_device *pd = m->private;
+	char *msg;
+	int states[PACKET_NUM_STATES];
+
+	seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev);
+
+	seq_printf(m, "\nSettings:\n");
+	seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2);
+
+	if (pd->settings.write_type == 0)
+		msg = "Packet";
+	else
+		msg = "Unknown";
+	seq_printf(m, "\twrite type:\t\t%s\n", msg);
+
+	seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable");
+	seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss);
+
+	seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode);
+
+	if (pd->settings.block_mode == PACKET_BLOCK_MODE1)
+		msg = "Mode 1";
+	else if (pd->settings.block_mode == PACKET_BLOCK_MODE2)
+		msg = "Mode 2";
+	else
+		msg = "Unknown";
+	seq_printf(m, "\tblock mode:\t\t%s\n", msg);
+
+	seq_printf(m, "\nStatistics:\n");
+	seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started);
+	seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended);
+	seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1);
+	seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1);
+	seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1);
+
+	seq_printf(m, "\nMisc:\n");
+	seq_printf(m, "\treference count:\t%d\n", pd->refcnt);
+	seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags);
+	seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed);
+	seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed);
+	seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset);
+	seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset);
+
+	seq_printf(m, "\nQueue state:\n");
+	seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size);
+	seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios));
+	seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector);
+
+	pkt_count_states(pd, states);
+	seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+		   states[0], states[1], states[2], states[3], states[4], states[5]);
+
+	seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n",
+			pd->write_congestion_off,
+			pd->write_congestion_on);
+	return 0;
+}
+
+static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
+{
+	int i;
+	struct block_device *bdev;
+	struct scsi_device *sdev;
+
+	if (pd->pkt_dev == dev) {
+		pkt_err(pd, "recursive setup not allowed\n");
+		return -EBUSY;
+	}
+	for (i = 0; i < MAX_WRITERS; i++) {
+		struct pktcdvd_device *pd2 = pkt_devs[i];
+		if (!pd2)
+			continue;
+		if (pd2->bdev->bd_dev == dev) {
+			pkt_err(pd, "%pg already setup\n", pd2->bdev);
+			return -EBUSY;
+		}
+		if (pd2->pkt_dev == dev) {
+			pkt_err(pd, "can't chain pktcdvd devices\n");
+			return -EBUSY;
+		}
+	}
+
+	bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL);
+	if (IS_ERR(bdev))
+		return PTR_ERR(bdev);
+	sdev = scsi_device_from_queue(bdev->bd_disk->queue);
+	if (!sdev) {
+		blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+		return -EINVAL;
+	}
+	put_device(&sdev->sdev_gendev);
+
+	/* This is safe, since we have a reference from open(). */
+	__module_get(THIS_MODULE);
+
+	pd->bdev = bdev;
+	set_blocksize(bdev, CD_FRAMESIZE);
+
+	pkt_init_queue(pd);
+
+	atomic_set(&pd->cdrw.pending_bios, 0);
+	pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
+	if (IS_ERR(pd->cdrw.thread)) {
+		pkt_err(pd, "can't start kernel thread\n");
+		goto out_mem;
+	}
+
+	proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd);
+	pkt_dbg(1, pd, "writer mapped to %pg\n", bdev);
+	return 0;
+
+out_mem:
+	blkdev_put(bdev, FMODE_READ | FMODE_NDELAY);
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+	return -ENOMEM;
+}
+
+static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg)
+{
+	struct pktcdvd_device *pd = bdev->bd_disk->private_data;
+	int ret;
+
+	pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
+		cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+
+	mutex_lock(&pktcdvd_mutex);
+	switch (cmd) {
+	case CDROMEJECT:
+		/*
+		 * The door gets locked when the device is opened, so we
+		 * have to unlock it or else the eject command fails.
+		 */
+		if (pd->refcnt == 1)
+			pkt_lock_door(pd, 0);
+		fallthrough;
+	/*
+	 * forward selected CDROM ioctls to CD-ROM, for UDF
+	 */
+	case CDROMMULTISESSION:
+	case CDROMREADTOCENTRY:
+	case CDROM_LAST_WRITTEN:
+	case CDROM_SEND_PACKET:
+	case SCSI_IOCTL_SEND_COMMAND:
+		if (!bdev->bd_disk->fops->ioctl)
+			ret = -ENOTTY;
+		else
+			ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg);
+		break;
+	default:
+		pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
+		ret = -ENOTTY;
+	}
+	mutex_unlock(&pktcdvd_mutex);
+
+	return ret;
+}
+
+static unsigned int pkt_check_events(struct gendisk *disk,
+				     unsigned int clearing)
+{
+	struct pktcdvd_device *pd = disk->private_data;
+	struct gendisk *attached_disk;
+
+	if (!pd)
+		return 0;
+	if (!pd->bdev)
+		return 0;
+	attached_disk = pd->bdev->bd_disk;
+	if (!attached_disk || !attached_disk->fops->check_events)
+		return 0;
+	return attached_disk->fops->check_events(attached_disk, clearing);
+}
+
+static char *pkt_devnode(struct gendisk *disk, umode_t *mode)
+{
+	return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name);
+}
+
+static const struct block_device_operations pktcdvd_ops = {
+	.owner =		THIS_MODULE,
+	.submit_bio =		pkt_submit_bio,
+	.open =			pkt_open,
+	.release =		pkt_close,
+	.ioctl =		pkt_ioctl,
+	.compat_ioctl =		blkdev_compat_ptr_ioctl,
+	.check_events =		pkt_check_events,
+	.devnode =		pkt_devnode,
+};
+
+/*
+ * Set up mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
+{
+	int idx;
+	int ret = -ENOMEM;
+	struct pktcdvd_device *pd;
+	struct gendisk *disk;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	for (idx = 0; idx < MAX_WRITERS; idx++)
+		if (!pkt_devs[idx])
+			break;
+	if (idx == MAX_WRITERS) {
+		pr_err("max %d writers supported\n", MAX_WRITERS);
+		ret = -EBUSY;
+		goto out_mutex;
+	}
+
+	pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL);
+	if (!pd)
+		goto out_mutex;
+
+	ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE,
+					sizeof(struct pkt_rb_node));
+	if (ret)
+		goto out_mem;
+
+	INIT_LIST_HEAD(&pd->cdrw.pkt_free_list);
+	INIT_LIST_HEAD(&pd->cdrw.pkt_active_list);
+	spin_lock_init(&pd->cdrw.active_list_lock);
+
+	spin_lock_init(&pd->lock);
+	spin_lock_init(&pd->iosched.lock);
+	bio_list_init(&pd->iosched.read_queue);
+	bio_list_init(&pd->iosched.write_queue);
+	sprintf(pd->name, DRIVER_NAME"%d", idx);
+	init_waitqueue_head(&pd->wqueue);
+	pd->bio_queue = RB_ROOT;
+
+	pd->write_congestion_on  = write_congestion_on;
+	pd->write_congestion_off = write_congestion_off;
+
+	ret = -ENOMEM;
+	disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (!disk)
+		goto out_mem;
+	pd->disk = disk;
+	disk->major = pktdev_major;
+	disk->first_minor = idx;
+	disk->minors = 1;
+	disk->fops = &pktcdvd_ops;
+	disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART;
+	strcpy(disk->disk_name, pd->name);
+	disk->private_data = pd;
+
+	pd->pkt_dev = MKDEV(pktdev_major, idx);
+	ret = pkt_new_dev(pd, dev);
+	if (ret)
+		goto out_mem2;
+
+	/* inherit events of the host device */
+	disk->events = pd->bdev->bd_disk->events;
+
+	ret = add_disk(disk);
+	if (ret)
+		goto out_mem2;
+
+	pkt_sysfs_dev_new(pd);
+	pkt_debugfs_dev_new(pd);
+
+	pkt_devs[idx] = pd;
+	if (pkt_dev)
+		*pkt_dev = pd->pkt_dev;
+
+	mutex_unlock(&ctl_mutex);
+	return 0;
+
+out_mem2:
+	put_disk(disk);
+out_mem:
+	mempool_exit(&pd->rb_pool);
+	kfree(pd);
+out_mutex:
+	mutex_unlock(&ctl_mutex);
+	pr_err("setup of pktcdvd device failed\n");
+	return ret;
+}
+
+/*
+ * Tear down mapping from pktcdvd device to CD-ROM device.
+ */
+static int pkt_remove_dev(dev_t pkt_dev)
+{
+	struct pktcdvd_device *pd;
+	int idx;
+	int ret = 0;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	for (idx = 0; idx < MAX_WRITERS; idx++) {
+		pd = pkt_devs[idx];
+		if (pd && (pd->pkt_dev == pkt_dev))
+			break;
+	}
+	if (idx == MAX_WRITERS) {
+		pr_debug("dev not setup\n");
+		ret = -ENXIO;
+		goto out;
+	}
+
+	if (pd->refcnt > 0) {
+		ret = -EBUSY;
+		goto out;
+	}
+	if (!IS_ERR(pd->cdrw.thread))
+		kthread_stop(pd->cdrw.thread);
+
+	pkt_devs[idx] = NULL;
+
+	pkt_debugfs_dev_remove(pd);
+	pkt_sysfs_dev_remove(pd);
+
+	blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
+
+	remove_proc_entry(pd->name, pkt_proc);
+	pkt_dbg(1, pd, "writer unmapped\n");
+
+	del_gendisk(pd->disk);
+	put_disk(pd->disk);
+
+	mempool_exit(&pd->rb_pool);
+	kfree(pd);
+
+	/* This is safe: open() is still holding a reference. */
+	module_put(THIS_MODULE);
+
+out:
+	mutex_unlock(&ctl_mutex);
+	return ret;
+}
+
+static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd)
+{
+	struct pktcdvd_device *pd;
+
+	mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING);
+
+	pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index);
+	if (pd) {
+		ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev);
+		ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev);
+	} else {
+		ctrl_cmd->dev = 0;
+		ctrl_cmd->pkt_dev = 0;
+	}
+	ctrl_cmd->num_devices = MAX_WRITERS;
+
+	mutex_unlock(&ctl_mutex);
+}
+
+static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct pkt_ctrl_command ctrl_cmd;
+	int ret = 0;
+	dev_t pkt_dev = 0;
+
+	if (cmd != PACKET_CTRL_CMD)
+		return -ENOTTY;
+
+	if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command)))
+		return -EFAULT;
+
+	switch (ctrl_cmd.command) {
+	case PKT_CTRL_CMD_SETUP:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev);
+		ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev);
+		break;
+	case PKT_CTRL_CMD_TEARDOWN:
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+		ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev));
+		break;
+	case PKT_CTRL_CMD_STATUS:
+		pkt_get_status(&ctrl_cmd);
+		break;
+	default:
+		return -ENOTTY;
+	}
+
+	if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command)))
+		return -EFAULT;
+	return ret;
+}
+
+#ifdef CONFIG_COMPAT
+static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations pkt_ctl_fops = {
+	.open		= nonseekable_open,
+	.unlocked_ioctl	= pkt_ctl_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= pkt_ctl_compat_ioctl,
+#endif
+	.owner		= THIS_MODULE,
+	.llseek		= no_llseek,
+};
+
+static struct miscdevice pkt_misc = {
+	.minor 		= MISC_DYNAMIC_MINOR,
+	.name  		= DRIVER_NAME,
+	.nodename	= "pktcdvd/control",
+	.fops  		= &pkt_ctl_fops
+};
+
+static int __init pkt_init(void)
+{
+	int ret;
+
+	mutex_init(&ctl_mutex);
+
+	ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE,
+				    sizeof(struct packet_stacked_data));
+	if (ret)
+		return ret;
+	ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0);
+	if (ret) {
+		mempool_exit(&psd_pool);
+		return ret;
+	}
+
+	ret = register_blkdev(pktdev_major, DRIVER_NAME);
+	if (ret < 0) {
+		pr_err("unable to register block device\n");
+		goto out2;
+	}
+	if (!pktdev_major)
+		pktdev_major = ret;
+
+	ret = pkt_sysfs_init();
+	if (ret)
+		goto out;
+
+	pkt_debugfs_init();
+
+	ret = misc_register(&pkt_misc);
+	if (ret) {
+		pr_err("unable to register misc device\n");
+		goto out_misc;
+	}
+
+	pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL);
+
+	return 0;
+
+out_misc:
+	pkt_debugfs_cleanup();
+	pkt_sysfs_cleanup();
+out:
+	unregister_blkdev(pktdev_major, DRIVER_NAME);
+out2:
+	mempool_exit(&psd_pool);
+	bioset_exit(&pkt_bio_set);
+	return ret;
+}
+
+static void __exit pkt_exit(void)
+{
+	remove_proc_entry("driver/"DRIVER_NAME, NULL);
+	misc_deregister(&pkt_misc);
+
+	pkt_debugfs_cleanup();
+	pkt_sysfs_cleanup();
+
+	unregister_blkdev(pktdev_major, DRIVER_NAME);
+	mempool_exit(&psd_pool);
+	bioset_exit(&pkt_bio_set);
+}
+
+MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives");
+MODULE_AUTHOR("Jens Axboe <axboe@suse.de>");
+MODULE_LICENSE("GPL");
+
+module_init(pkt_init);
+module_exit(pkt_exit);
diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
new file mode 100644
index 000000000000..f9c5ac80d59b
--- /dev/null
+++ b/include/linux/pktcdvd.h
@@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
+ * DVD-RW devices.
+ *
+ */
+#ifndef __PKTCDVD_H
+#define __PKTCDVD_H
+
+#include <linux/blkdev.h>
+#include <linux/completion.h>
+#include <linux/cdrom.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/mempool.h>
+#include <uapi/linux/pktcdvd.h>
+
+/* default bio write queue congestion marks */
+#define PKT_WRITE_CONGESTION_ON    10000
+#define PKT_WRITE_CONGESTION_OFF   9000
+
+
+struct packet_settings
+{
+	__u32			size;		/* packet size in (512 byte) sectors */
+	__u8			fp;		/* fixed packets */
+	__u8			link_loss;	/* the rest is specified
+						 * as per Mt Fuji */
+	__u8			write_type;
+	__u8			track_mode;
+	__u8			block_mode;
+};
+
+/*
+ * Very crude stats for now
+ */
+struct packet_stats
+{
+	unsigned long		pkt_started;
+	unsigned long		pkt_ended;
+	unsigned long		secs_w;
+	unsigned long		secs_rg;
+	unsigned long		secs_r;
+};
+
+struct packet_cdrw
+{
+	struct list_head	pkt_free_list;
+	struct list_head	pkt_active_list;
+	spinlock_t		active_list_lock; /* Serialize access to pkt_active_list */
+	struct task_struct	*thread;
+	atomic_t		pending_bios;
+};
+
+/*
+ * Switch to high speed reading after reading this many kilobytes
+ * with no interspersed writes.
+ */
+#define HI_SPEED_SWITCH 512
+
+struct packet_iosched
+{
+	atomic_t		attention;	/* Set to non-zero when queue processing is needed */
+	int			writing;	/* Non-zero when writing, zero when reading */
+	spinlock_t		lock;		/* Protecting read/write queue manipulations */
+	struct bio_list		read_queue;
+	struct bio_list		write_queue;
+	sector_t		last_write;	/* The sector where the last write ended */
+	int			successive_reads;
+};
+
+/*
+ * 32 buffers of 2048 bytes
+ */
+#if (PAGE_SIZE % CD_FRAMESIZE) != 0
+#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE"
+#endif
+#define PACKET_MAX_SIZE		128
+#define FRAMES_PER_PAGE		(PAGE_SIZE / CD_FRAMESIZE)
+#define PACKET_MAX_SECTORS	(PACKET_MAX_SIZE * CD_FRAMESIZE >> 9)
+
+enum packet_data_state {
+	PACKET_IDLE_STATE,			/* Not used at the moment */
+	PACKET_WAITING_STATE,			/* Waiting for more bios to arrive, so */
+						/* we don't have to do as much */
+						/* data gathering */
+	PACKET_READ_WAIT_STATE,			/* Waiting for reads to fill in holes */
+	PACKET_WRITE_WAIT_STATE,		/* Waiting for the write to complete */
+	PACKET_RECOVERY_STATE,			/* Recover after read/write errors */
+	PACKET_FINISHED_STATE,			/* After write has finished */
+
+	PACKET_NUM_STATES			/* Number of possible states */
+};
+
+/*
+ * Information needed for writing a single packet
+ */
+struct pktcdvd_device;
+
+struct packet_data
+{
+	struct list_head	list;
+
+	spinlock_t		lock;		/* Lock protecting state transitions and */
+						/* orig_bios list */
+
+	struct bio_list		orig_bios;	/* Original bios passed to pkt_make_request */
+						/* that will be handled by this packet */
+	int			write_size;	/* Total size of all bios in the orig_bios */
+						/* list, measured in number of frames */
+
+	struct bio		*w_bio;		/* The bio we will send to the real CD */
+						/* device once we have all data for the */
+						/* packet we are going to write */
+	sector_t		sector;		/* First sector in this packet */
+	int			frames;		/* Number of frames in this packet */
+
+	enum packet_data_state	state;		/* Current state */
+	atomic_t		run_sm;		/* Incremented whenever the state */
+						/* machine needs to be run */
+	long			sleep_time;	/* Set this to non-zero to make the state */
+						/* machine run after this many jiffies. */
+
+	atomic_t		io_wait;	/* Number of pending IO operations */
+	atomic_t		io_errors;	/* Number of read/write errors during IO */
+
+	struct bio		*r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */
+	struct page		*pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE];
+
+	int			cache_valid;	/* If non-zero, the data for the zone defined */
+						/* by the sector variable is completely cached */
+						/* in the pages[] vector. */
+
+	int			id;		/* ID number for debugging */
+	struct pktcdvd_device	*pd;
+};
+
+struct pkt_rb_node {
+	struct rb_node		rb_node;
+	struct bio		*bio;
+};
+
+struct packet_stacked_data
+{
+	struct bio		*bio;		/* Original read request bio */
+	struct pktcdvd_device	*pd;
+};
+#define PSD_POOL_SIZE		64
+
+struct pktcdvd_device
+{
+	struct block_device	*bdev;		/* dev attached */
+	dev_t			pkt_dev;	/* our dev */
+	char			name[20];
+	struct packet_settings	settings;
+	struct packet_stats	stats;
+	int			refcnt;		/* Open count */
+	int			write_speed;	/* current write speed, kB/s */
+	int			read_speed;	/* current read speed, kB/s */
+	unsigned long		offset;		/* start offset */
+	__u8			mode_offset;	/* 0 / 8 */
+	__u8			type;
+	unsigned long		flags;
+	__u16			mmc3_profile;
+	__u32			nwa;		/* next writable address */
+	__u32			lra;		/* last recorded address */
+	struct packet_cdrw	cdrw;
+	wait_queue_head_t	wqueue;
+
+	spinlock_t		lock;		/* Serialize access to bio_queue */
+	struct rb_root		bio_queue;	/* Work queue of bios we need to handle */
+	int			bio_queue_size;	/* Number of nodes in bio_queue */
+	bool			congested;	/* Someone is waiting for bio_queue_size
+						 * to drop. */
+	sector_t		current_sector;	/* Keep track of where the elevator is */
+	atomic_t		scan_queue;	/* Set to non-zero when pkt_handle_queue */
+						/* needs to be run. */
+	mempool_t		rb_pool;	/* mempool for pkt_rb_node allocations */
+
+	struct packet_iosched   iosched;
+	struct gendisk		*disk;
+
+	int			write_congestion_off;
+	int			write_congestion_on;
+
+	struct device		*dev;		/* sysfs pktcdvd[0-7] dev */
+
+	struct dentry		*dfs_d_root;	/* debugfs: devname directory */
+	struct dentry		*dfs_f_info;	/* debugfs: info file */
+};
+
+#endif /* __PKTCDVD_H */
diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h
new file mode 100644
index 000000000000..9cbb55d21c94
--- /dev/null
+++ b/include/uapi/linux/pktcdvd.h
@@ -0,0 +1,112 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
+ * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
+ * DVD-RW devices.
+ *
+ */
+#ifndef _UAPI__PKTCDVD_H
+#define _UAPI__PKTCDVD_H
+
+#include <linux/types.h>
+
+/*
+ * 1 for normal debug messages, 2 is very verbose. 0 to turn it off.
+ */
+#define PACKET_DEBUG		1
+
+#define	MAX_WRITERS		8
+
+#define PKT_RB_POOL_SIZE	512
+
+/*
+ * How long we should hold a non-full packet before starting data gathering.
+ */
+#define PACKET_WAIT_TIME	(HZ * 5 / 1000)
+
+/*
+ * use drive write caching -- we need deferred error handling to be
+ * able to successfully recover with this option (drive will return good
+ * status as soon as the cdb is validated).
+ */
+#if defined(CONFIG_CDROM_PKTCDVD_WCACHE)
+#define USE_WCACHING		1
+#else
+#define USE_WCACHING		0
+#endif
+
+/*
+ * No user-servicable parts beyond this point ->
+ */
+
+/*
+ * device types
+ */
+#define PACKET_CDR		1
+#define	PACKET_CDRW		2
+#define PACKET_DVDR		3
+#define PACKET_DVDRW		4
+
+/*
+ * flags
+ */
+#define PACKET_WRITABLE		1	/* pd is writable */
+#define PACKET_NWA_VALID	2	/* next writable address valid */
+#define PACKET_LRA_VALID	3	/* last recorded address valid */
+#define PACKET_MERGE_SEGS	4	/* perform segment merging to keep */
+					/* underlying cdrom device happy */
+
+/*
+ * Disc status -- from READ_DISC_INFO
+ */
+#define PACKET_DISC_EMPTY	0
+#define PACKET_DISC_INCOMPLETE	1
+#define PACKET_DISC_COMPLETE	2
+#define PACKET_DISC_OTHER	3
+
+/*
+ * write type, and corresponding data block type
+ */
+#define PACKET_MODE1		1
+#define PACKET_MODE2		2
+#define PACKET_BLOCK_MODE1	8
+#define PACKET_BLOCK_MODE2	10
+
+/*
+ * Last session/border status
+ */
+#define PACKET_SESSION_EMPTY		0
+#define PACKET_SESSION_INCOMPLETE	1
+#define PACKET_SESSION_RESERVED		2
+#define PACKET_SESSION_COMPLETE		3
+
+#define PACKET_MCN			"4a656e734178626f65323030300000"
+
+#undef PACKET_USE_LS
+
+#define PKT_CTRL_CMD_SETUP	0
+#define PKT_CTRL_CMD_TEARDOWN	1
+#define PKT_CTRL_CMD_STATUS	2
+
+struct pkt_ctrl_command {
+	__u32 command;				/* in: Setup, teardown, status */
+	__u32 dev_index;			/* in/out: Device index */
+	__u32 dev;				/* in/out: Device nr for cdrw device */
+	__u32 pkt_dev;				/* in/out: Device nr for packet device */
+	__u32 num_devices;			/* out: Largest device index + 1 */
+	__u32 padding;				/* Not used */
+};
+
+/*
+ * packet ioctls
+ */
+#define PACKET_IOCTL_MAGIC	('X')
+#define PACKET_CTRL_CMD		_IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command)
+
+
+#endif /* _UAPI__PKTCDVD_H */
-- 
cgit v1.3.1


From 19e183b54528f11fafeca60fc6d0821e29ff281e Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Thu, 22 Dec 2022 18:12:50 +0000
Subject: elfcore: Add a cprm parameter to elf_core_extra_{phdrs,data_size}

A subsequent fix for arm64 will use this parameter to parse the vma
information from the snapshot created by dump_vma_snapshot() rather than
traversing the vma list without the mmap_lock.

Fixes: 6dd8b1a0b6cb ("arm64: mte: Dump the MTE tags in the core file")
Cc: <stable@vger.kernel.org> # 5.18.x
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Reported-by: Seth Jenkins <sethjenkins@google.com>
Suggested-by: Seth Jenkins <sethjenkins@google.com>
Cc: Will Deacon <will@kernel.org>
Cc: Eric Biederman <ebiederm@xmission.com>
Cc: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20221222181251.1345752-3-catalin.marinas@arm.com
Signed-off-by: Will Deacon <will@kernel.org>
---
 arch/arm64/kernel/elfcore.c | 4 ++--
 arch/ia64/kernel/elfcore.c  | 4 ++--
 arch/x86/um/elfcore.c       | 4 ++--
 fs/binfmt_elf.c             | 4 ++--
 fs/binfmt_elf_fdpic.c       | 4 ++--
 include/linux/elfcore.h     | 8 ++++----
 6 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c
index 45f0aba1d875..f25519e283a5 100644
--- a/arch/arm64/kernel/elfcore.c
+++ b/arch/arm64/kernel/elfcore.c
@@ -76,7 +76,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm,
 	return ret;
 }
 
-Elf_Half elf_core_extra_phdrs(void)
+Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma;
 	int vma_count = 0;
@@ -113,7 +113,7 @@ int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset)
 	return 1;
 }
 
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	struct vm_area_struct *vma;
 	size_t data_size = 0;
diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c
index 94680521fbf9..8895df121540 100644
--- a/arch/ia64/kernel/elfcore.c
+++ b/arch/ia64/kernel/elfcore.c
@@ -7,7 +7,7 @@
 #include <asm/elf.h>
 
 
-Elf64_Half elf_core_extra_phdrs(void)
+Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	return GATE_EHDR->e_phnum;
 }
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
 	return 1;
 }
 
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	const struct elf_phdr *const gate_phdrs =
 		(const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff);
diff --git a/arch/x86/um/elfcore.c b/arch/x86/um/elfcore.c
index 48a3eb09d951..650cdbbdaf45 100644
--- a/arch/x86/um/elfcore.c
+++ b/arch/x86/um/elfcore.c
@@ -7,7 +7,7 @@
 #include <asm/elf.h>
 
 
-Elf32_Half elf_core_extra_phdrs(void)
+Elf32_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	return vsyscall_ehdr ? (((struct elfhdr *)vsyscall_ehdr)->e_phnum) : 0;
 }
@@ -60,7 +60,7 @@ int elf_core_write_extra_data(struct coredump_params *cprm)
 	return 1;
 }
 
-size_t elf_core_extra_data_size(void)
+size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	if ( vsyscall_ehdr ) {
 		const struct elfhdr *const ehdrp =
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index de63572a9404..9a780fafc539 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2034,7 +2034,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	 * The number of segs are recored into ELF header as 16bit value.
 	 * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
 	 */
-	segs = cprm->vma_count + elf_core_extra_phdrs();
+	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
 
 	/* for notes section */
 	segs++;
@@ -2074,7 +2074,7 @@ static int elf_core_dump(struct coredump_params *cprm)
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
 	offset += cprm->vma_data_size;
-	offset += elf_core_extra_data_size();
+	offset += elf_core_extra_data_size(cprm);
 	e_shoff = offset;
 
 	if (e_phnum == PN_XNUM) {
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 096e3520a0b1..a05eafcacfb2 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1509,7 +1509,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	tmp->next = thread_list;
 	thread_list = tmp;
 
-	segs = cprm->vma_count + elf_core_extra_phdrs();
+	segs = cprm->vma_count + elf_core_extra_phdrs(cprm);
 
 	/* for notes section */
 	segs++;
@@ -1555,7 +1555,7 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
 	dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
 
 	offset += cprm->vma_data_size;
-	offset += elf_core_extra_data_size();
+	offset += elf_core_extra_data_size(cprm);
 	e_shoff = offset;
 
 	if (e_phnum == PN_XNUM) {
diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h
index 9ec81290e3c8..bd5560542c79 100644
--- a/include/linux/elfcore.h
+++ b/include/linux/elfcore.h
@@ -105,14 +105,14 @@ int elf_core_copy_task_fpregs(struct task_struct *t, elf_fpregset_t *fpu);
  * Dumping its extra ELF program headers includes all the other information
  * a debugger needs to easily find how the gate DSO was being used.
  */
-extern Elf_Half elf_core_extra_phdrs(void);
+extern Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm);
 extern int
 elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset);
 extern int
 elf_core_write_extra_data(struct coredump_params *cprm);
-extern size_t elf_core_extra_data_size(void);
+extern size_t elf_core_extra_data_size(struct coredump_params *cprm);
 #else
-static inline Elf_Half elf_core_extra_phdrs(void)
+static inline Elf_Half elf_core_extra_phdrs(struct coredump_params *cprm)
 {
 	return 0;
 }
@@ -127,7 +127,7 @@ static inline int elf_core_write_extra_data(struct coredump_params *cprm)
 	return 1;
 }
 
-static inline size_t elf_core_extra_data_size(void)
+static inline size_t elf_core_extra_data_size(struct coredump_params *cprm)
 {
 	return 0;
 }
-- 
cgit v1.3.1


From 5040011d073d3acdeb58af2b64f84e33bb03abd2 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 2 Nov 2022 10:24:29 +0000
Subject: rxrpc: Make the local endpoint hold a ref on a connected call

Make the local endpoint and it's I/O thread hold a reference on a connected
call until that call is disconnected.  Without this, we're reliant on
either the AF_RXRPC socket to hold a ref (which is dropped when the call is
released) or a queued work item to hold a ref (the work item is being
replaced with the I/O thread).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h |  3 +++
 net/rxrpc/call_object.c      |  2 ++
 net/rxrpc/conn_client.c      |  6 +++---
 net/rxrpc/conn_object.c      | 25 +++++++++++++++----------
 4 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 5f9dd7389536..b526d982da7e 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -148,6 +148,7 @@
 	E_(rxrpc_client_to_idle,		"->Idle")
 
 #define rxrpc_call_traces \
+	EM(rxrpc_call_get_io_thread,		"GET iothread") \
 	EM(rxrpc_call_get_input,		"GET input   ") \
 	EM(rxrpc_call_get_kernel_service,	"GET krnl-srv") \
 	EM(rxrpc_call_get_notify_socket,	"GET notify  ") \
@@ -160,6 +161,7 @@
 	EM(rxrpc_call_new_prealloc_service,	"NEW prealloc") \
 	EM(rxrpc_call_put_discard_prealloc,	"PUT disc-pre") \
 	EM(rxrpc_call_put_discard_error,	"PUT disc-err") \
+	EM(rxrpc_call_put_io_thread,		"PUT iothread") \
 	EM(rxrpc_call_put_input,		"PUT input   ") \
 	EM(rxrpc_call_put_kernel,		"PUT kernel  ") \
 	EM(rxrpc_call_put_poke,			"PUT poke    ") \
@@ -173,6 +175,7 @@
 	EM(rxrpc_call_see_activate_client,	"SEE act-clnt") \
 	EM(rxrpc_call_see_connect_failed,	"SEE con-fail") \
 	EM(rxrpc_call_see_connected,		"SEE connect ") \
+	EM(rxrpc_call_see_disconnected,		"SEE disconn ") \
 	EM(rxrpc_call_see_distribute_error,	"SEE dist-err") \
 	EM(rxrpc_call_see_input,		"SEE input   ") \
 	EM(rxrpc_call_see_release,		"SEE release ") \
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 89dcf60b1158..239fc3c75079 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -453,6 +453,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 		BUG();
 	}
 
+	rxrpc_get_call(call, rxrpc_call_get_io_thread);
+
 	/* Set the channel for this call.  We don't get channel_lock as we're
 	 * only defending against the data_ready handler (which we're called
 	 * from) and the RESPONSE packet parser (which is only really
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index e4063c4f4bb2..1edd65883c55 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -725,8 +725,11 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 
 	rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
 
+	rxrpc_get_call(call, rxrpc_call_get_io_thread);
+
 	bundle = rxrpc_prep_call(rx, call, cp, srx, gfp);
 	if (IS_ERR(bundle)) {
+		rxrpc_put_call(call, rxrpc_call_get_io_thread);
 		ret = PTR_ERR(bundle);
 		goto out;
 	}
@@ -820,7 +823,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 	_enter("c=%x", call->debug_id);
 
 	spin_lock(&bundle->channel_lock);
-	set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
 
 	/* Calls that have never actually been assigned a channel can simply be
 	 * discarded.
@@ -912,8 +914,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 
 out:
 	spin_unlock(&bundle->channel_lock);
-	_leave("");
-	return;
 }
 
 /*
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 3c8f83dacb2b..2bd3f6288895 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -178,6 +178,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 {
 	struct rxrpc_connection *conn = call->conn;
 
+	set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+	rxrpc_see_call(call, rxrpc_call_see_disconnected);
+
 	call->peer->cong_ssthresh = call->cong_ssthresh;
 
 	if (!hlist_unhashed(&call->error_link)) {
@@ -186,18 +189,20 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 		spin_unlock(&call->peer->lock);
 	}
 
-	if (rxrpc_is_client_call(call))
-		return rxrpc_disconnect_client_call(conn->bundle, call);
+	if (rxrpc_is_client_call(call)) {
+		rxrpc_disconnect_client_call(conn->bundle, call);
+	} else {
+		spin_lock(&conn->bundle->channel_lock);
+		__rxrpc_disconnect_call(conn, call);
+		spin_unlock(&conn->bundle->channel_lock);
 
-	spin_lock(&conn->bundle->channel_lock);
-	__rxrpc_disconnect_call(conn, call);
-	spin_unlock(&conn->bundle->channel_lock);
+		conn->idle_timestamp = jiffies;
+		if (atomic_dec_and_test(&conn->active))
+			rxrpc_set_service_reap_timer(conn->rxnet,
+						     jiffies + rxrpc_connection_expiry);
+	}
 
-	set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
-	conn->idle_timestamp = jiffies;
-	if (atomic_dec_and_test(&conn->active))
-		rxrpc_set_service_reap_timer(conn->rxnet,
-					     jiffies + rxrpc_connection_expiry);
+	rxrpc_put_call(call, rxrpc_call_put_io_thread);
 }
 
 /*
-- 
cgit v1.3.1


From a343b174b4bdde851033996960bca5ad1394d04b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 12 Oct 2022 22:17:56 +0100
Subject: rxrpc: Only set/transmit aborts in the I/O thread

Only set the abort call completion state in the I/O thread and only
transmit ABORT packets from there.  rxrpc_abort_call() can then be made to
actually send the packet.

Further, ABORT packets should only be sent if the call has been exposed to
the network (ie. at least one attempted DATA transmission has occurred for
it).

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h |  1 +
 net/rxrpc/ar-internal.h      |  7 ++++++-
 net/rxrpc/call_event.c       | 16 +++++++++++++---
 net/rxrpc/call_object.c      |  7 ++++---
 net/rxrpc/input.c            |  6 ++----
 net/rxrpc/recvmsg.c          |  2 ++
 net/rxrpc/sendmsg.c          | 29 ++++++++++++++++++++++-------
 7 files changed, 50 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index b526d982da7e..c44cc01de750 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -17,6 +17,7 @@
  * Declare tracing information enums and their string mappings for display.
  */
 #define rxrpc_call_poke_traces \
+	EM(rxrpc_call_poke_abort,		"Abort")	\
 	EM(rxrpc_call_poke_error,		"Error")	\
 	EM(rxrpc_call_poke_idle,		"Idle")		\
 	EM(rxrpc_call_poke_start,		"Start")	\
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index f3b8806e7241..0cf28a56aec5 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -625,7 +625,10 @@ struct rxrpc_call {
 	unsigned long		events;
 	spinlock_t		notify_lock;	/* Kernel notification lock */
 	rwlock_t		state_lock;	/* lock for state transition */
-	u32			abort_code;	/* Local/remote abort code */
+	const char		*send_abort_why; /* String indicating why the abort was sent */
+	s32			send_abort;	/* Abort code to be sent */
+	short			send_abort_err;	/* Error to be associated with the abort */
+	s32			abort_code;	/* Local/remote abort code */
 	int			error;		/* Local error incurred */
 	enum rxrpc_call_state	state;		/* current state of call */
 	enum rxrpc_call_completion completion;	/* Call completion condition */
@@ -1146,6 +1149,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *,
 /*
  * sendmsg.c
  */
+bool rxrpc_propose_abort(struct rxrpc_call *call,
+			 u32 abort_code, int error, const char *why);
 int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
 
 /*
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index b2cf448fb02c..b7efecf5ccfc 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -270,9 +270,11 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
 {
 	struct rxrpc_txbuf *txb;
 
-	if (rxrpc_is_client_call(call) &&
-	    !test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+	if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+		if (list_empty(&call->tx_sendmsg))
+			return;
 		rxrpc_expose_client_call(call);
+	}
 
 	while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
 					       struct rxrpc_txbuf, call_link))) {
@@ -336,6 +338,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 	unsigned long now, next, t;
 	rxrpc_serial_t ackr_serial;
 	bool resend = false, expired = false;
+	s32 abort_code;
 
 	rxrpc_see_call(call, rxrpc_call_see_input);
 
@@ -346,6 +349,14 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 	if (call->state == RXRPC_CALL_COMPLETE)
 		goto out;
 
+	/* Handle abort request locklessly, vs rxrpc_propose_abort(). */
+	abort_code = smp_load_acquire(&call->send_abort);
+	if (abort_code) {
+		rxrpc_abort_call(call->send_abort_why, call, 0, call->send_abort,
+				 call->send_abort_err);
+		goto out;
+	}
+
 	if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
 		goto out;
 
@@ -433,7 +444,6 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 		} else {
 			rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
 		}
-		rxrpc_send_abort_packet(call);
 		goto out;
 	}
 
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 239fc3c75079..298b7c465d7e 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -430,6 +430,8 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 	call->state		= RXRPC_CALL_SERVER_SECURING;
 	call->cong_tstamp	= skb->tstamp;
 
+	__set_bit(RXRPC_CALL_EXPOSED, &call->flags);
+
 	spin_lock(&conn->state_lock);
 
 	switch (conn->state) {
@@ -590,7 +592,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 		call = list_entry(rx->to_be_accepted.next,
 				  struct rxrpc_call, accept_link);
 		list_del(&call->accept_link);
-		rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
+		rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKR");
 		rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
 	}
 
@@ -598,8 +600,7 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 		call = list_entry(rx->sock_calls.next,
 				  struct rxrpc_call, sock_link);
 		rxrpc_get_call(call, rxrpc_call_get_release_sock);
-		rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
-		rxrpc_send_abort_packet(call);
+		rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKT");
 		rxrpc_release_call(rx, call);
 		rxrpc_put_call(call, rxrpc_call_put_release_sock);
 	}
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d0e20e946e48..1f03a286620d 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -12,8 +12,7 @@
 static void rxrpc_proto_abort(const char *why,
 			      struct rxrpc_call *call, rxrpc_seq_t seq)
 {
-	if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG))
-		rxrpc_send_abort_packet(call);
+	rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG);
 }
 
 /*
@@ -1007,8 +1006,7 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
 	case RXRPC_CALL_COMPLETE:
 		break;
 	default:
-		if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN))
-			rxrpc_send_abort_packet(call);
+		rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN);
 		trace_rxrpc_improper_term(call);
 		break;
 	}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 6ebd6440a2b7..a4ccdc006d0f 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -134,6 +134,8 @@ bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
 	write_lock(&call->state_lock);
 	ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
 	write_unlock(&call->state_lock);
+	if (ret && test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+		rxrpc_send_abort_packet(call);
 	return ret;
 }
 
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index cde1e65f16b4..dc3c2a834fc8 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -17,6 +17,26 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
+/*
+ * Propose an abort to be made in the I/O thread.
+ */
+bool rxrpc_propose_abort(struct rxrpc_call *call,
+			 u32 abort_code, int error, const char *why)
+{
+	_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+
+	if (!call->send_abort && call->state < RXRPC_CALL_COMPLETE) {
+		call->send_abort_why = why;
+		call->send_abort_err = error;
+		/* Request abort locklessly vs rxrpc_input_call_event(). */
+		smp_store_release(&call->send_abort, abort_code);
+		rxrpc_poke_call(call, rxrpc_call_poke_abort);
+		return true;
+	}
+
+	return false;
+}
+
 /*
  * Return true if there's sufficient Tx queue space.
  */
@@ -663,9 +683,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		/* it's too late for this call */
 		ret = -ESHUTDOWN;
 	} else if (p.command == RXRPC_CMD_SEND_ABORT) {
+		rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, "CMD");
 		ret = 0;
-		if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED))
-			ret = rxrpc_send_abort_packet(call);
 	} else if (p.command != RXRPC_CMD_SEND_DATA) {
 		ret = -EINVAL;
 	} else {
@@ -760,11 +779,7 @@ bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
 	_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
 
 	mutex_lock(&call->user_mutex);
-
-	aborted = rxrpc_abort_call(why, call, 0, abort_code, error);
-	if (aborted)
-		rxrpc_send_abort_packet(call);
-
+	aborted = rxrpc_propose_abort(call, abort_code, error, why);
 	mutex_unlock(&call->user_mutex);
 	return aborted;
 }
-- 
cgit v1.3.1


From 03fc55adf8761c546d72798264b019c9f672c578 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 12 Oct 2022 17:01:25 +0100
Subject: rxrpc: Only disconnect calls in the I/O thread

Only perform call disconnection in the I/O thread to reduce the locking
requirement.

This is the first part of a fix for a race that exists between call
connection and call disconnection whereby the data transmission code adds
the call to the peer error distribution list after the call has been
disconnected (say by the rxrpc socket getting closed).

The fix is to complete the process of moving call connection, data
transmission and call disconnection into the I/O thread and thus forcibly
serialising them.

Note that the issue may predate the overhaul to an I/O thread model that
were included in the merge window for v6.2, but the timing is very much
changed by the change given below.

Fixes: cf37b5987508 ("rxrpc: Move DATA transmission into call processor work item")
Reported-by: syzbot+c22650d2844392afdcfd@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h | 1 +
 net/rxrpc/call_event.c       | 7 ++++++-
 net/rxrpc/call_object.c      | 9 +--------
 net/rxrpc/input.c            | 6 ------
 net/rxrpc/recvmsg.c          | 1 +
 5 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index c44cc01de750..eac513668e33 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -18,6 +18,7 @@
  */
 #define rxrpc_call_poke_traces \
 	EM(rxrpc_call_poke_abort,		"Abort")	\
+	EM(rxrpc_call_poke_complete,		"Compl")	\
 	EM(rxrpc_call_poke_error,		"Error")	\
 	EM(rxrpc_call_poke_idle,		"Idle")		\
 	EM(rxrpc_call_poke_start,		"Start")	\
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index b7efecf5ccfc..b2fc3fa686ec 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -484,8 +484,13 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 	}
 
 out:
-	if (call->state == RXRPC_CALL_COMPLETE)
+	if (call->state == RXRPC_CALL_COMPLETE) {
 		del_timer_sync(&call->timer);
+		if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+			rxrpc_disconnect_call(call);
+		if (call->security)
+			call->security->free_call_crypto(call);
+	}
 	if (call->acks_hard_ack != call->tx_bottom)
 		rxrpc_shrink_call_tx_buffer(call);
 	_leave("");
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 298b7c465d7e..13aac3ca03a0 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -50,7 +50,7 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
 	struct rxrpc_local *local = call->local;
 	bool busy;
 
-	if (call->state < RXRPC_CALL_COMPLETE) {
+	if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) {
 		spin_lock_bh(&local->lock);
 		busy = !list_empty(&call->attend_link);
 		trace_rxrpc_poke_call(call, busy, what);
@@ -533,13 +533,10 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 	trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
 			 call->flags, rxrpc_call_see_release);
 
-	ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
-
 	if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
 		BUG();
 
 	rxrpc_put_call_slot(call);
-	del_timer_sync(&call->timer);
 
 	/* Make sure we don't get any more notifications */
 	write_lock(&rx->recvmsg_lock);
@@ -572,10 +569,6 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 
 	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
-	if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
-		rxrpc_disconnect_call(call);
-	if (call->security)
-		call->security->free_call_crypto(call);
 	_leave("");
 }
 
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 1f03a286620d..bb4beb445325 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -997,8 +997,6 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
  */
 void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
 {
-	struct rxrpc_connection *conn = call->conn;
-
 	switch (READ_ONCE(call->state)) {
 	case RXRPC_CALL_SERVER_AWAIT_ACK:
 		rxrpc_call_completed(call);
@@ -1012,8 +1010,4 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
 	}
 
 	rxrpc_input_call_event(call, skb);
-
-	spin_lock(&conn->bundle->channel_lock);
-	__rxrpc_disconnect_call(conn, call);
-	spin_unlock(&conn->bundle->channel_lock);
 }
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index a4ccdc006d0f..8d5fe65f5951 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -201,6 +201,7 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
 	case RXRPC_CALL_CLIENT_RECV_REPLY:
 		__rxrpc_call_completed(call);
 		write_unlock(&call->state_lock);
+		rxrpc_poke_call(call, rxrpc_call_poke_complete);
 		break;
 
 	case RXRPC_CALL_SERVER_RECV_REQUEST:
-- 
cgit v1.3.1


From f2cce89a074e6d2991dddc94f6b6ebe1576b8459 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Oct 2022 09:08:34 +0100
Subject: rxrpc: Implement a mechanism to send an event notification to a
 connection

Provide a means by which an event notification can be sent to a connection
through such that the I/O thread can pick it up and handle it rather than
doing it in a separate workqueue.

This is then used to move the deferred final ACK of a call into the I/O
thread rather than a separate work queue as part of the drive to do all
transmission from the I/O thread.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h |  5 ++---
 net/rxrpc/ar-internal.h      |  5 +++++
 net/rxrpc/conn_event.c       | 14 ++++++++++----
 net/rxrpc/conn_object.c      | 20 +++++++++++++++++++-
 net/rxrpc/io_thread.c        | 19 ++++++++++++++++++-
 net/rxrpc/local_object.c     |  1 +
 6 files changed, 55 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index eac513668e33..b969756f97fc 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -111,7 +111,7 @@
 	EM(rxrpc_conn_get_call_input,		"GET inp-call") \
 	EM(rxrpc_conn_get_conn_input,		"GET inp-conn") \
 	EM(rxrpc_conn_get_idle,			"GET idle    ") \
-	EM(rxrpc_conn_get_poke,			"GET poke    ") \
+	EM(rxrpc_conn_get_poke_timer,		"GET poke    ") \
 	EM(rxrpc_conn_get_service_conn,		"GET svc-conn") \
 	EM(rxrpc_conn_new_client,		"NEW client  ") \
 	EM(rxrpc_conn_new_service,		"NEW service ") \
@@ -126,10 +126,9 @@
 	EM(rxrpc_conn_put_service_reaped,	"PUT svc-reap") \
 	EM(rxrpc_conn_put_unbundle,		"PUT unbundle") \
 	EM(rxrpc_conn_put_unidle,		"PUT unidle  ") \
+	EM(rxrpc_conn_put_work,			"PUT work    ") \
 	EM(rxrpc_conn_queue_challenge,		"QUE chall   ") \
-	EM(rxrpc_conn_queue_retry_work,		"QUE retry-wk") \
 	EM(rxrpc_conn_queue_rx_work,		"QUE rx-work ") \
-	EM(rxrpc_conn_queue_timer,		"QUE timer   ") \
 	EM(rxrpc_conn_see_new_service_conn,	"SEE new-svc ") \
 	EM(rxrpc_conn_see_reap_service,		"SEE reap-svc") \
 	E_(rxrpc_conn_see_work,			"SEE work    ")
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 0cf28a56aec5..d82d7f36cdaa 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -202,6 +202,7 @@ struct rxrpc_host_header {
  * - max 48 bytes (struct sk_buff::cb)
  */
 struct rxrpc_skb_priv {
+	struct rxrpc_connection *conn;	/* Connection referred to (poke packet) */
 	u16		offset;		/* Offset of data */
 	u16		len;		/* Length of data */
 	u8		flags;
@@ -292,6 +293,7 @@ struct rxrpc_local {
 	struct rxrpc_sock __rcu	*service;	/* Service(s) listening on this endpoint */
 	struct rw_semaphore	defrag_sem;	/* control re-enablement of IP DF bit */
 	struct sk_buff_head	rx_queue;	/* Received packets */
+	struct list_head	conn_attend_q;	/* Conns requiring immediate attention */
 	struct list_head	call_attend_q;	/* Calls requiring immediate attention */
 	struct rb_root		client_bundles;	/* Client connection bundles by socket params */
 	spinlock_t		client_bundles_lock; /* Lock for client_bundles */
@@ -441,6 +443,7 @@ struct rxrpc_connection {
 	struct rxrpc_peer	*peer;		/* Remote endpoint */
 	struct rxrpc_net	*rxnet;		/* Network namespace to which call belongs */
 	struct key		*key;		/* Security details */
+	struct list_head	attend_link;	/* Link in local->conn_attend_q */
 
 	refcount_t		ref;
 	atomic_t		active;		/* Active count for service conns */
@@ -905,6 +908,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *s
 void rxrpc_process_connection(struct work_struct *);
 void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
 int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb);
 
 /*
  * conn_object.c
@@ -912,6 +916,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
 extern unsigned int rxrpc_connection_expiry;
 extern unsigned int rxrpc_closed_conn_expiry;
 
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why);
 struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t);
 struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *,
 							  struct sockaddr_rxrpc *,
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index dfd29882126f..7a980a32344f 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -412,10 +412,6 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
 	if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
 		rxrpc_secure_connection(conn);
 
-	/* Process delayed ACKs whose time has come. */
-	if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
-		rxrpc_process_delayed_final_acks(conn, false);
-
 	/* go through the conn-level event packets, releasing the ref on this
 	 * connection that each one has when we've finished with it */
 	while ((skb = skb_dequeue(&conn->rx_queue))) {
@@ -515,3 +511,13 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
 		return -EPROTO;
 	}
 }
+
+/*
+ * Input a connection event.
+ */
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+	/* Process delayed ACKs whose time has come. */
+	if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+		rxrpc_process_delayed_final_acks(conn, false);
+}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 2bd3f6288895..281f59e356f5 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -23,12 +23,30 @@ static void rxrpc_clean_up_connection(struct work_struct *work);
 static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
 					 unsigned long reap_at);
 
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
+{
+	struct rxrpc_local *local = conn->local;
+	bool busy;
+
+	if (WARN_ON_ONCE(!local))
+		return;
+
+	spin_lock_bh(&local->lock);
+	busy = !list_empty(&conn->attend_link);
+	if (!busy) {
+		rxrpc_get_connection(conn, why);
+		list_add_tail(&conn->attend_link, &local->conn_attend_q);
+	}
+	spin_unlock_bh(&local->lock);
+	rxrpc_wake_up_io_thread(local);
+}
+
 static void rxrpc_connection_timer(struct timer_list *timer)
 {
 	struct rxrpc_connection *conn =
 		container_of(timer, struct rxrpc_connection, timer);
 
-	rxrpc_queue_conn(conn, rxrpc_conn_queue_timer);
+	rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer);
 }
 
 /*
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 0e1a548d35f8..46e58cf5bc96 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -421,6 +421,7 @@ reject_packet:
  */
 int rxrpc_io_thread(void *data)
 {
+	struct rxrpc_connection *conn;
 	struct sk_buff_head rx_queue;
 	struct rxrpc_local *local = data;
 	struct rxrpc_call *call;
@@ -436,6 +437,20 @@ int rxrpc_io_thread(void *data)
 	for (;;) {
 		rxrpc_inc_stat(local->rxnet, stat_io_loop);
 
+		/* Deal with connections that want immediate attention. */
+		conn = list_first_entry_or_null(&local->conn_attend_q,
+						struct rxrpc_connection,
+						attend_link);
+		if (conn) {
+			spin_lock_bh(&local->lock);
+			list_del_init(&conn->attend_link);
+			spin_unlock_bh(&local->lock);
+
+			rxrpc_input_conn_event(conn, NULL);
+			rxrpc_put_connection(conn, rxrpc_conn_put_poke);
+			continue;
+		}
+
 		/* Deal with calls that want immediate attention. */
 		if ((call = list_first_entry_or_null(&local->call_attend_q,
 						     struct rxrpc_call,
@@ -463,6 +478,7 @@ int rxrpc_io_thread(void *data)
 				rxrpc_input_error(local, skb);
 				rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
 				break;
+				break;
 			default:
 				WARN_ON_ONCE(1);
 				rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
@@ -481,7 +497,8 @@ int rxrpc_io_thread(void *data)
 		set_current_state(TASK_INTERRUPTIBLE);
 		should_stop = kthread_should_stop();
 		if (!skb_queue_empty(&local->rx_queue) ||
-		    !list_empty(&local->call_attend_q)) {
+		    !list_empty(&local->call_attend_q) ||
+		    !list_empty(&local->conn_attend_q)) {
 			__set_current_state(TASK_RUNNING);
 			continue;
 		}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index c0ac2fe07ec4..8ef6cd8defa4 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -100,6 +100,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
 		init_rwsem(&local->defrag_sem);
 		init_completion(&local->io_thread_ready);
 		skb_queue_head_init(&local->rx_queue);
+		INIT_LIST_HEAD(&local->conn_attend_q);
 		INIT_LIST_HEAD(&local->call_attend_q);
 		local->client_bundles = RB_ROOT;
 		spin_lock_init(&local->client_bundles_lock);
-- 
cgit v1.3.1


From a00ce28b1778fa3576575b43bdb17f60ded38b66 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 20 Oct 2022 09:56:36 +0100
Subject: rxrpc: Clean up connection abort

Clean up connection abort, using the connection state_lock to gate access
to change that state, and use an rxrpc_call_completion value to indicate
the difference between local and remote aborts as these can be pasted
directly into the call state.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h |   2 +
 net/rxrpc/ar-internal.h      |  46 +++++----
 net/rxrpc/call_object.c      |   8 +-
 net/rxrpc/conn_event.c       | 233 +++++++++++++++----------------------------
 net/rxrpc/insecure.c         |  18 +---
 net/rxrpc/output.c           |  56 +++++++++++
 net/rxrpc/proc.c             |  10 +-
 net/rxrpc/rxkad.c            |  28 ++----
 8 files changed, 188 insertions(+), 213 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index b969756f97fc..222d0498d23f 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -111,6 +111,7 @@
 	EM(rxrpc_conn_get_call_input,		"GET inp-call") \
 	EM(rxrpc_conn_get_conn_input,		"GET inp-conn") \
 	EM(rxrpc_conn_get_idle,			"GET idle    ") \
+	EM(rxrpc_conn_get_poke_abort,		"GET pk-abort") \
 	EM(rxrpc_conn_get_poke_timer,		"GET poke    ") \
 	EM(rxrpc_conn_get_service_conn,		"GET svc-conn") \
 	EM(rxrpc_conn_new_client,		"NEW client  ") \
@@ -128,6 +129,7 @@
 	EM(rxrpc_conn_put_unidle,		"PUT unidle  ") \
 	EM(rxrpc_conn_put_work,			"PUT work    ") \
 	EM(rxrpc_conn_queue_challenge,		"QUE chall   ") \
+	EM(rxrpc_conn_queue_retry_work,		"QUE retry-wk") \
 	EM(rxrpc_conn_queue_rx_work,		"QUE rx-work ") \
 	EM(rxrpc_conn_see_new_service_conn,	"SEE new-svc ") \
 	EM(rxrpc_conn_see_reap_service,		"SEE reap-svc") \
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index d82d7f36cdaa..78bd6fb0bc15 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -263,13 +263,11 @@ struct rxrpc_security {
 
 	/* respond to a challenge */
 	int (*respond_to_challenge)(struct rxrpc_connection *,
-				    struct sk_buff *,
-				    u32 *);
+				    struct sk_buff *);
 
 	/* verify a response */
 	int (*verify_response)(struct rxrpc_connection *,
-			       struct sk_buff *,
-			       u32 *);
+			       struct sk_buff *);
 
 	/* clear connection security */
 	void (*clear)(struct rxrpc_connection *);
@@ -367,6 +365,18 @@ struct rxrpc_conn_parameters {
 	u32			security_level;	/* Security level selected */
 };
 
+/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
+	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
+	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
+	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
+	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
+	NR__RXRPC_CALL_COMPLETIONS
+};
+
 /*
  * Bits in the connection flags.
  */
@@ -391,6 +401,7 @@ enum rxrpc_conn_flag {
  */
 enum rxrpc_conn_event {
 	RXRPC_CONN_EV_CHALLENGE,	/* Send challenge packet */
+	RXRPC_CONN_EV_ABORT_CALLS,	/* Abort attached calls */
 };
 
 /*
@@ -403,8 +414,7 @@ enum rxrpc_conn_proto_state {
 	RXRPC_CONN_SERVICE_UNSECURED,	/* Service unsecured connection */
 	RXRPC_CONN_SERVICE_CHALLENGING,	/* Service challenging for security */
 	RXRPC_CONN_SERVICE,		/* Service secured connection */
-	RXRPC_CONN_REMOTELY_ABORTED,	/* Conn aborted by peer */
-	RXRPC_CONN_LOCALLY_ABORTED,	/* Conn aborted locally */
+	RXRPC_CONN_ABORTED,		/* Conn aborted */
 	RXRPC_CONN__NR_STATES
 };
 
@@ -487,7 +497,8 @@ struct rxrpc_connection {
 	unsigned long		idle_timestamp;	/* Time at which last became idle */
 	spinlock_t		state_lock;	/* state-change lock */
 	enum rxrpc_conn_proto_state state;	/* current state of connection */
-	u32			abort_code;	/* Abort code of connection abort */
+	enum rxrpc_call_completion completion;	/* Completion condition */
+	s32			abort_code;	/* Abort code of connection abort */
 	int			debug_id;	/* debug ID for printks */
 	atomic_t		serial;		/* packet serial number counter */
 	unsigned int		hi_serial;	/* highest serial number received */
@@ -561,18 +572,6 @@ enum rxrpc_call_state {
 	NR__RXRPC_CALL_STATES
 };
 
-/*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
-	RXRPC_CALL_SUCCEEDED,		/* - Normal termination */
-	RXRPC_CALL_REMOTELY_ABORTED,	/* - call aborted by peer */
-	RXRPC_CALL_LOCALLY_ABORTED,	/* - call aborted locally on error or close */
-	RXRPC_CALL_LOCAL_ERROR,		/* - call failed due to local error */
-	RXRPC_CALL_NETWORK_ERROR,	/* - call terminated by network error */
-	NR__RXRPC_CALL_COMPLETIONS
-};
-
 /*
  * Call Tx congestion management modes.
  */
@@ -905,11 +904,19 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *);
  */
 void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb,
 				unsigned int channel);
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+		     s32 abort_code, int err, const char *why);
 void rxrpc_process_connection(struct work_struct *);
 void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
 int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
 void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb);
 
+static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn)
+{
+	/* Order reading the abort info after the state check. */
+	return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED;
+}
+
 /*
  * conn_object.c
  */
@@ -1059,6 +1066,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
 int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
 int rxrpc_send_abort_packet(struct rxrpc_call *);
 int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
 void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
 void rxrpc_send_keepalive(struct rxrpc_peer *);
 void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 13aac3ca03a0..666430182dfd 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -443,14 +443,10 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 		call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
 		break;
 
-	case RXRPC_CONN_REMOTELY_ABORTED:
-		__rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+	case RXRPC_CONN_ABORTED:
+		__rxrpc_set_call_completion(call, conn->completion,
 					    conn->abort_code, conn->error);
 		break;
-	case RXRPC_CONN_LOCALLY_ABORTED:
-		__rxrpc_abort_call("CON", call, 1,
-				   conn->abort_code, conn->error);
-		break;
 	default:
 		BUG();
 	}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 7a980a32344f..753d91a9646f 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -16,6 +16,60 @@
 #include <net/ip.h>
 #include "ar-internal.h"
 
+/*
+ * Set the completion state on an aborted connection.
+ */
+static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb,
+				   s32 abort_code, int err,
+				   enum rxrpc_call_completion compl)
+{
+	bool aborted = false;
+
+	if (conn->state != RXRPC_CONN_ABORTED) {
+		spin_lock(&conn->state_lock);
+		if (conn->state != RXRPC_CONN_ABORTED) {
+			conn->abort_code = abort_code;
+			conn->error	 = err;
+			conn->completion = compl;
+			/* Order the abort info before the state change. */
+			smp_store_release(&conn->state, RXRPC_CONN_ABORTED);
+			set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+			set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events);
+			aborted = true;
+		}
+		spin_unlock(&conn->state_lock);
+	}
+
+	return aborted;
+}
+
+/*
+ * Mark a socket buffer to indicate that the connection it's on should be aborted.
+ */
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+		     s32 abort_code, int err, const char *why)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	if (rxrpc_set_conn_aborted(conn, skb, abort_code, err,
+				   RXRPC_CALL_LOCALLY_ABORTED)) {
+		trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber,
+				  sp->hdr.seq, abort_code, err);
+		rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort);
+	}
+	return -EPROTO;
+}
+
+/*
+ * Mark a connection as being remotely aborted.
+ */
+static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn,
+				   struct sk_buff *skb)
+{
+	return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+				      RXRPC_CALL_REMOTELY_ABORTED);
+}
+
 /*
  * Retransmit terminal ACK or ABORT of the previous call.
  */
@@ -146,9 +200,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 /*
  * pass a connection-level abort onto all calls on that connection
  */
-static void rxrpc_abort_calls(struct rxrpc_connection *conn,
-			      enum rxrpc_call_completion compl,
-			      rxrpc_serial_t serial)
+static void rxrpc_abort_calls(struct rxrpc_connection *conn)
 {
 	struct rxrpc_call *call;
 	int i;
@@ -161,102 +213,17 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn,
 		call = rcu_dereference_protected(
 			conn->channels[i].call,
 			lockdep_is_held(&conn->bundle->channel_lock));
-		if (call) {
-			if (compl == RXRPC_CALL_LOCALLY_ABORTED)
-				trace_rxrpc_abort(call->debug_id,
-						  "CON", call->cid,
-						  call->call_id, 0,
-						  conn->abort_code,
-						  conn->error);
-			else
-				trace_rxrpc_rx_abort(call, serial,
-						     conn->abort_code);
-			rxrpc_set_call_completion(call, compl,
+		if (call)
+			rxrpc_set_call_completion(call,
+						  conn->completion,
 						  conn->abort_code,
 						  conn->error);
-		}
 	}
 
 	spin_unlock(&conn->bundle->channel_lock);
 	_leave("");
 }
 
-/*
- * generate a connection-level abort
- */
-static int rxrpc_abort_connection(struct rxrpc_connection *conn,
-				  int error, u32 abort_code)
-{
-	struct rxrpc_wire_header whdr;
-	struct msghdr msg;
-	struct kvec iov[2];
-	__be32 word;
-	size_t len;
-	u32 serial;
-	int ret;
-
-	_enter("%d,,%u,%u", conn->debug_id, error, abort_code);
-
-	/* generate a connection-level abort */
-	spin_lock(&conn->state_lock);
-	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
-		spin_unlock(&conn->state_lock);
-		_leave(" = 0 [already dead]");
-		return 0;
-	}
-
-	conn->error = error;
-	conn->abort_code = abort_code;
-	conn->state = RXRPC_CONN_LOCALLY_ABORTED;
-	set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
-	spin_unlock(&conn->state_lock);
-
-	msg.msg_name	= &conn->peer->srx.transport;
-	msg.msg_namelen	= conn->peer->srx.transport_len;
-	msg.msg_control	= NULL;
-	msg.msg_controllen = 0;
-	msg.msg_flags	= 0;
-
-	whdr.epoch	= htonl(conn->proto.epoch);
-	whdr.cid	= htonl(conn->proto.cid);
-	whdr.callNumber	= 0;
-	whdr.seq	= 0;
-	whdr.type	= RXRPC_PACKET_TYPE_ABORT;
-	whdr.flags	= conn->out_clientflag;
-	whdr.userStatus	= 0;
-	whdr.securityIndex = conn->security_ix;
-	whdr._rsvd	= 0;
-	whdr.serviceId	= htons(conn->service_id);
-
-	word		= htonl(conn->abort_code);
-
-	iov[0].iov_base	= &whdr;
-	iov[0].iov_len	= sizeof(whdr);
-	iov[1].iov_base	= &word;
-	iov[1].iov_len	= sizeof(word);
-
-	len = iov[0].iov_len + iov[1].iov_len;
-
-	serial = atomic_inc_return(&conn->serial);
-	rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
-	whdr.serial = htonl(serial);
-
-	ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
-	if (ret < 0) {
-		trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
-				    rxrpc_tx_point_conn_abort);
-		_debug("sendmsg failed: %d", ret);
-		return -EAGAIN;
-	}
-
-	trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
-
-	conn->peer->last_tx_at = ktime_get_seconds();
-
-	_leave(" = 0");
-	return 0;
-}
-
 /*
  * mark a call as being on a now-secured channel
  * - must be called with BH's disabled.
@@ -278,26 +245,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
  * connection-level Rx packet processor
  */
 static int rxrpc_process_event(struct rxrpc_connection *conn,
-			       struct sk_buff *skb,
-			       u32 *_abort_code)
+			       struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	int loop, ret;
 
-	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
-		_leave(" = -ECONNABORTED [%u]", conn->state);
+	if (conn->state == RXRPC_CONN_ABORTED)
 		return -ECONNABORTED;
-	}
 
 	_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
 
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_CHALLENGE:
-		return conn->security->respond_to_challenge(conn, skb,
-							    _abort_code);
+		return conn->security->respond_to_challenge(conn, skb);
 
 	case RXRPC_PACKET_TYPE_RESPONSE:
-		ret = conn->security->verify_response(conn, skb, _abort_code);
+		ret = conn->security->verify_response(conn, skb);
 		if (ret < 0)
 			return ret;
 
@@ -336,26 +299,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
  */
 static void rxrpc_secure_connection(struct rxrpc_connection *conn)
 {
-	u32 abort_code;
-	int ret;
-
-	_enter("{%d}", conn->debug_id);
-
-	ASSERT(conn->security_ix != 0);
-
-	if (conn->security->issue_challenge(conn) < 0) {
-		abort_code = RX_CALL_DEAD;
-		ret = -ENOMEM;
-		goto abort;
-	}
-
-	_leave("");
-	return;
-
-abort:
-	_debug("abort %d, %d", ret, abort_code);
-	rxrpc_abort_connection(conn, ret, abort_code);
-	_leave(" [aborted]");
+	if (conn->security->issue_challenge(conn) < 0)
+		rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM, "OOM");
 }
 
 /*
@@ -406,7 +351,6 @@ again:
 static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
 {
 	struct sk_buff *skb;
-	u32 abort_code = RX_PROTOCOL_ERROR;
 	int ret;
 
 	if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
@@ -416,33 +360,18 @@ static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
 	 * connection that each one has when we've finished with it */
 	while ((skb = skb_dequeue(&conn->rx_queue))) {
 		rxrpc_see_skb(skb, rxrpc_skb_see_conn_work);
-		ret = rxrpc_process_event(conn, skb, &abort_code);
+		ret = rxrpc_process_event(conn, skb);
 		switch (ret) {
-		case -EPROTO:
-		case -EKEYEXPIRED:
-		case -EKEYREJECTED:
-			goto protocol_error;
 		case -ENOMEM:
 		case -EAGAIN:
-			goto requeue_and_leave;
-		case -ECONNABORTED:
+			skb_queue_head(&conn->rx_queue, skb);
+			rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work);
+			break;
 		default:
 			rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
 			break;
 		}
 	}
-
-	return;
-
-requeue_and_leave:
-	skb_queue_head(&conn->rx_queue, skb);
-	return;
-
-protocol_error:
-	if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
-		goto requeue_and_leave;
-	rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
-	return;
 }
 
 void rxrpc_process_connection(struct work_struct *work)
@@ -480,28 +409,25 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
-	if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
-		_leave(" = -ECONNABORTED [%u]", conn->state);
-		return 0;
-	}
-
-	_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
-
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_BUSY:
 		/* Just ignore BUSY packets for now. */
 		return 0;
 
 	case RXRPC_PACKET_TYPE_ABORT:
-		conn->error = -ECONNABORTED;
-		conn->abort_code = skb->priority;
-		conn->state = RXRPC_CONN_REMOTELY_ABORTED;
-		set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
-		rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
-		return 0;
+		if (rxrpc_is_conn_aborted(conn))
+			return true;
+		rxrpc_input_conn_abort(conn, skb);
+		rxrpc_abort_calls(conn);
+		return true;
 
 	case RXRPC_PACKET_TYPE_CHALLENGE:
 	case RXRPC_PACKET_TYPE_RESPONSE:
+		if (rxrpc_is_conn_aborted(conn)) {
+			if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED)
+				rxrpc_send_conn_abort(conn);
+			return true;
+		}
 		rxrpc_post_packet_to_conn(conn, skb);
 		return 0;
 
@@ -517,6 +443,9 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
  */
 void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
 {
+	if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
+		rxrpc_abort_calls(conn);
+
 	/* Process delayed ACKs whose time has come. */
 	if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
 		rxrpc_process_delayed_final_acks(conn, false);
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 0eb8471bfc53..29dcc7d3f51a 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -43,25 +43,15 @@ static void none_free_call_crypto(struct rxrpc_call *call)
 }
 
 static int none_respond_to_challenge(struct rxrpc_connection *conn,
-				     struct sk_buff *skb,
-				     u32 *_abort_code)
+				     struct sk_buff *skb)
 {
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
-			      tracepoint_string("chall_none"));
-	return -EPROTO;
+	return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN");
 }
 
 static int none_verify_response(struct rxrpc_connection *conn,
-				struct sk_buff *skb,
-				u32 *_abort_code)
+				struct sk_buff *skb)
 {
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
-	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
-			      tracepoint_string("resp_none"));
-	return -EPROTO;
+	return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN");
 }
 
 static void none_clear(struct rxrpc_connection *conn)
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 3d8c9f830ee0..8a5ff2c9e061 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -544,6 +544,62 @@ send_fragmentable:
 	goto done;
 }
 
+/*
+ * Transmit a connection-level abort.
+ */
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
+{
+	struct rxrpc_wire_header whdr;
+	struct msghdr msg;
+	struct kvec iov[2];
+	__be32 word;
+	size_t len;
+	u32 serial;
+	int ret;
+
+	msg.msg_name	= &conn->peer->srx.transport;
+	msg.msg_namelen	= conn->peer->srx.transport_len;
+	msg.msg_control	= NULL;
+	msg.msg_controllen = 0;
+	msg.msg_flags	= 0;
+
+	whdr.epoch	= htonl(conn->proto.epoch);
+	whdr.cid	= htonl(conn->proto.cid);
+	whdr.callNumber	= 0;
+	whdr.seq	= 0;
+	whdr.type	= RXRPC_PACKET_TYPE_ABORT;
+	whdr.flags	= conn->out_clientflag;
+	whdr.userStatus	= 0;
+	whdr.securityIndex = conn->security_ix;
+	whdr._rsvd	= 0;
+	whdr.serviceId	= htons(conn->service_id);
+
+	word		= htonl(conn->abort_code);
+
+	iov[0].iov_base	= &whdr;
+	iov[0].iov_len	= sizeof(whdr);
+	iov[1].iov_base	= &word;
+	iov[1].iov_len	= sizeof(word);
+
+	len = iov[0].iov_len + iov[1].iov_len;
+
+	serial = atomic_inc_return(&conn->serial);
+	whdr.serial = htonl(serial);
+
+	iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+	ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+	if (ret < 0) {
+		trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+				    rxrpc_tx_point_conn_abort);
+		_debug("sendmsg failed: %d", ret);
+		return;
+	}
+
+	trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
+
+	conn->peer->last_tx_at = ktime_get_seconds();
+}
+
 /*
  * Reject a packet through the local endpoint.
  */
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 3a59591ec061..63947cce4048 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -17,8 +17,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
 	[RXRPC_CONN_SERVICE_UNSECURED]		= "SvUnsec ",
 	[RXRPC_CONN_SERVICE_CHALLENGING]	= "SvChall ",
 	[RXRPC_CONN_SERVICE]			= "SvSecure",
-	[RXRPC_CONN_REMOTELY_ABORTED]		= "RmtAbort",
-	[RXRPC_CONN_LOCALLY_ABORTED]		= "LocAbort",
+	[RXRPC_CONN_ABORTED]			= "Aborted ",
 };
 
 /*
@@ -143,6 +142,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 {
 	struct rxrpc_connection *conn;
 	struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+	const char *state;
 	char lbuff[50], rbuff[50];
 
 	if (v == &rxnet->conn_proc_list) {
@@ -163,9 +163,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
 	}
 
 	sprintf(lbuff, "%pISpc", &conn->local->srx.transport);
-
 	sprintf(rbuff, "%pISpc", &conn->peer->srx.transport);
 print:
+	state = rxrpc_is_conn_aborted(conn) ?
+		rxrpc_call_completions[conn->completion] :
+		rxrpc_conn_states[conn->state];
 	seq_printf(seq,
 		   "UDP   %-47.47s %-47.47s %4x %08x %s %3u %3d"
 		   " %s %08x %08x %08x %08x %08x %08x %08x\n",
@@ -176,7 +178,7 @@ print:
 		   rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
 		   refcount_read(&conn->ref),
 		   atomic_read(&conn->active),
-		   rxrpc_conn_states[conn->state],
+		   state,
 		   key_serial(conn->key),
 		   atomic_read(&conn->serial),
 		   conn->hi_serial,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index d1233720e05f..5d2fbc6ec3cf 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -821,8 +821,7 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn,
  * respond to a challenge packet
  */
 static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
-				      struct sk_buff *skb,
-				      u32 *_abort_code)
+				      struct sk_buff *skb)
 {
 	const struct rxrpc_key_token *token;
 	struct rxkad_challenge challenge;
@@ -898,7 +897,7 @@ protocol_error:
 	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
 	ret = -EPROTO;
 other_error:
-	*_abort_code = abort_code;
+	rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK");
 	return ret;
 }
 
@@ -910,8 +909,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 				struct sk_buff *skb,
 				void *ticket, size_t ticket_len,
 				struct rxrpc_crypt *_session_key,
-				time64_t *_expiry,
-				u32 *_abort_code)
+				time64_t *_expiry)
 {
 	struct skcipher_request *req;
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -1042,8 +1040,7 @@ bad_ticket:
 	abort_code = RXKADBADTICKET;
 	ret = -EPROTO;
 other_error:
-	*_abort_code = abort_code;
-	return ret;
+	return rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK");
 temporary_error:
 	return ret;
 }
@@ -1086,8 +1083,7 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
  * verify a response
  */
 static int rxkad_verify_response(struct rxrpc_connection *conn,
-				 struct sk_buff *skb,
-				 u32 *_abort_code)
+				 struct sk_buff *skb)
 {
 	struct rxkad_response *response;
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -1115,11 +1111,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 			abort_code = RXKADNOAUTH;
 			break;
 		}
-		trace_rxrpc_abort(0, "SVK",
-				  sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-				  abort_code, PTR_ERR(server_key));
-		*_abort_code = abort_code;
-		return -EPROTO;
+		return rxrpc_abort_conn(conn, skb, abort_code,
+					PTR_ERR(server_key), "RXK");
 	}
 
 	ret = -ENOMEM;
@@ -1168,7 +1161,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 		goto temporary_error_free_ticket;
 
 	ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len,
-				   &session_key, &expiry, _abort_code);
+				   &session_key, &expiry);
 	if (ret < 0)
 		goto temporary_error_free_ticket;
 
@@ -1246,10 +1239,9 @@ protocol_error_free:
 	kfree(ticket);
 protocol_error:
 	kfree(response);
-	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
 	key_put(server_key);
-	*_abort_code = abort_code;
-	return -EPROTO;
+	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
+	return rxrpc_abort_conn(conn, skb, abort_code, -EPROTO, "RXK");
 
 temporary_error_free_ticket:
 	kfree(ticket);
-- 
cgit v1.3.1


From 57af281e5389b6fefedb3685f86847cbb0055f75 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 6 Oct 2022 21:45:42 +0100
Subject: rxrpc: Tidy up abort generation infrastructure

Tidy up the abort generation infrastructure in the following ways:

 (1) Create an enum and string mapping table to list the reasons an abort
     might be generated in tracing.

 (2) Replace the 3-char string with the values from (1) in the places that
     use that to log the abort source.  This gets rid of a memcpy() in the
     tracepoint.

 (3) Subsume the rxrpc_rx_eproto tracepoint with the rxrpc_abort tracepoint
     and use values from (1) to indicate the trace reason.

 (4) Always make a call to an abort function at the point of the abort
     rather than stashing the values into variables and using goto to get
     to a place where it reported.  The C optimiser will collapse the calls
     together as appropriate.  The abort functions return a value that can
     be returned directly if appropriate.

Note that this extends into afs also at the points where that generates an
abort.  To aid with this, the afs sources need to #define
RXRPC_TRACE_ONLY_DEFINE_ENUMS before including the rxrpc tracing header
because they don't have access to the rxrpc internal structures that some
of the tracepoints make use of.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 fs/afs/cmservice.c           |   6 +-
 fs/afs/rxrpc.c               |  23 +++-
 include/net/af_rxrpc.h       |   3 +-
 include/trace/events/rxrpc.h | 140 +++++++++++++++----
 net/rxrpc/ar-internal.h      |  51 ++++---
 net/rxrpc/call_accept.c      |  43 +++---
 net/rxrpc/call_event.c       |  13 +-
 net/rxrpc/call_object.c      |   6 +-
 net/rxrpc/conn_event.c       |  19 ++-
 net/rxrpc/input.c            |  65 ++++-----
 net/rxrpc/insecure.c         |   6 +-
 net/rxrpc/io_thread.c        | 163 ++++++++++------------
 net/rxrpc/recvmsg.c          |  18 ++-
 net/rxrpc/rxkad.c            | 321 ++++++++++++++++++-------------------------
 net/rxrpc/rxperf.c           |  17 ++-
 net/rxrpc/security.c         |  14 +-
 net/rxrpc/sendmsg.c          |  20 +--
 17 files changed, 484 insertions(+), 444 deletions(-)

(limited to 'include')

diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 7dcd59693a0c..d4ddb20d6732 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -13,6 +13,8 @@
 #include "internal.h"
 #include "afs_cm.h"
 #include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
 
 static int afs_deliver_cb_init_call_back_state(struct afs_call *);
 static int afs_deliver_cb_init_call_back_state3(struct afs_call *);
@@ -191,7 +193,7 @@ static void afs_cm_destructor(struct afs_call *call)
  * Abort a service call from within an action function.
  */
 static void afs_abort_service_call(struct afs_call *call, u32 abort_code, int error,
-				   const char *why)
+				   enum rxrpc_abort_reason why)
 {
 	rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
 				abort_code, error, why);
@@ -469,7 +471,7 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
 	if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
 		afs_send_empty_reply(call);
 	else
-		afs_abort_service_call(call, 1, 1, "K-1");
+		afs_abort_service_call(call, 1, 1, afs_abort_probeuuid_negative);
 
 	afs_put_call(call);
 	_leave("");
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index c62939e5ea1f..bd3830bc6700 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -13,6 +13,8 @@
 #include "internal.h"
 #include "afs_cm.h"
 #include "protocol_yfs.h"
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
 
 struct workqueue_struct *afs_async_calls;
 
@@ -397,7 +399,8 @@ void afs_make_call(struct afs_addr_cursor *ac, struct afs_call *call, gfp_t gfp)
 error_do_abort:
 	if (ret != -ECONNABORTED) {
 		rxrpc_kernel_abort_call(call->net->socket, rxcall,
-					RX_USER_ABORT, ret, "KSD");
+					RX_USER_ABORT, ret,
+					afs_abort_send_data_error);
 	} else {
 		len = 0;
 		iov_iter_kvec(&msg.msg_iter, ITER_DEST, NULL, 0, 0);
@@ -527,7 +530,8 @@ static void afs_deliver_to_call(struct afs_call *call)
 		case -ENOTSUPP:
 			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-						abort_code, ret, "KIV");
+						abort_code, ret,
+						afs_abort_op_not_supported);
 			goto local_abort;
 		case -EIO:
 			pr_err("kAFS: Call %u in bad state %u\n",
@@ -542,12 +546,14 @@ static void afs_deliver_to_call(struct afs_call *call)
 			if (state != AFS_CALL_CL_AWAIT_REPLY)
 				abort_code = RXGEN_SS_UNMARSHAL;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-						abort_code, ret, "KUM");
+						abort_code, ret,
+						afs_abort_unmarshal_error);
 			goto local_abort;
 		default:
 			abort_code = RX_CALL_DEAD;
 			rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-						abort_code, ret, "KER");
+						abort_code, ret,
+						afs_abort_general_error);
 			goto local_abort;
 		}
 	}
@@ -619,7 +625,8 @@ long afs_wait_for_call_to_complete(struct afs_call *call,
 			/* Kill off the call if it's still live. */
 			_debug("call interrupted");
 			if (rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
-						    RX_USER_ABORT, -EINTR, "KWI"))
+						    RX_USER_ABORT, -EINTR,
+						    afs_abort_interrupted))
 				afs_set_call_complete(call, -EINTR, 0);
 		}
 	}
@@ -836,7 +843,8 @@ void afs_send_empty_reply(struct afs_call *call)
 	case -ENOMEM:
 		_debug("oom");
 		rxrpc_kernel_abort_call(net->socket, call->rxcall,
-					RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+					RXGEN_SS_MARSHAL, -ENOMEM,
+					afs_abort_oom);
 		fallthrough;
 	default:
 		_leave(" [error]");
@@ -878,7 +886,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	if (n == -ENOMEM) {
 		_debug("oom");
 		rxrpc_kernel_abort_call(net->socket, call->rxcall,
-					RXGEN_SS_MARSHAL, -ENOMEM, "KOO");
+					RXGEN_SS_MARSHAL, -ENOMEM,
+					afs_abort_oom);
 	}
 	_leave(" [error]");
 }
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index d5a5ae926380..ba717eac0229 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -15,6 +15,7 @@ struct key;
 struct sock;
 struct socket;
 struct rxrpc_call;
+enum rxrpc_abort_reason;
 
 enum rxrpc_interruptibility {
 	RXRPC_INTERRUPTIBLE,	/* Call is interruptible */
@@ -55,7 +56,7 @@ int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *,
 int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *,
 			   struct iov_iter *, size_t *, bool, u32 *, u16 *);
 bool rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *,
-			     u32, int, const char *);
+			     u32, int, enum rxrpc_abort_reason);
 void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *);
 void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *,
 			   struct sockaddr_rxrpc *);
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 222d0498d23f..caeabd50e049 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -16,6 +16,104 @@
 /*
  * Declare tracing information enums and their string mappings for display.
  */
+#define rxrpc_abort_reasons \
+	/* AFS errors */						\
+	EM(afs_abort_general_error,		"afs-error")		\
+	EM(afs_abort_interrupted,		"afs-intr")		\
+	EM(afs_abort_oom,			"afs-oom")		\
+	EM(afs_abort_op_not_supported,		"afs-op-notsupp")	\
+	EM(afs_abort_probeuuid_negative,	"afs-probeuuid-neg")	\
+	EM(afs_abort_send_data_error,		"afs-send-data")	\
+	EM(afs_abort_unmarshal_error,		"afs-unmarshal")	\
+	/* rxperf errors */						\
+	EM(rxperf_abort_general_error,		"rxperf-error")		\
+	EM(rxperf_abort_oom,			"rxperf-oom")		\
+	EM(rxperf_abort_op_not_supported,	"rxperf-op-notsupp")	\
+	EM(rxperf_abort_unmarshal_error,	"rxperf-unmarshal")	\
+	/* RxKAD security errors */					\
+	EM(rxkad_abort_1_short_check,		"rxkad1-short-check")	\
+	EM(rxkad_abort_1_short_data,		"rxkad1-short-data")	\
+	EM(rxkad_abort_1_short_encdata,		"rxkad1-short-encdata")	\
+	EM(rxkad_abort_1_short_header,		"rxkad1-short-hdr")	\
+	EM(rxkad_abort_2_short_check,		"rxkad2-short-check")	\
+	EM(rxkad_abort_2_short_data,		"rxkad2-short-data")	\
+	EM(rxkad_abort_2_short_header,		"rxkad2-short-hdr")	\
+	EM(rxkad_abort_2_short_len,		"rxkad2-short-len")	\
+	EM(rxkad_abort_bad_checksum,		"rxkad2-bad-cksum")	\
+	EM(rxkad_abort_chall_key_expired,	"rxkad-chall-key-exp")	\
+	EM(rxkad_abort_chall_level,		"rxkad-chall-level")	\
+	EM(rxkad_abort_chall_no_key,		"rxkad-chall-nokey")	\
+	EM(rxkad_abort_chall_short,		"rxkad-chall-short")	\
+	EM(rxkad_abort_chall_version,		"rxkad-chall-version")	\
+	EM(rxkad_abort_resp_bad_callid,		"rxkad-resp-bad-callid") \
+	EM(rxkad_abort_resp_bad_checksum,	"rxkad-resp-bad-cksum")	\
+	EM(rxkad_abort_resp_bad_param,		"rxkad-resp-bad-param")	\
+	EM(rxkad_abort_resp_call_ctr,		"rxkad-resp-call-ctr") \
+	EM(rxkad_abort_resp_call_state,		"rxkad-resp-call-state") \
+	EM(rxkad_abort_resp_key_expired,	"rxkad-resp-key-exp")	\
+	EM(rxkad_abort_resp_key_rejected,	"rxkad-resp-key-rej")	\
+	EM(rxkad_abort_resp_level,		"rxkad-resp-level")	\
+	EM(rxkad_abort_resp_nokey,		"rxkad-resp-nokey")	\
+	EM(rxkad_abort_resp_ooseq,		"rxkad-resp-ooseq")	\
+	EM(rxkad_abort_resp_short,		"rxkad-resp-short")	\
+	EM(rxkad_abort_resp_short_tkt,		"rxkad-resp-short-tkt")	\
+	EM(rxkad_abort_resp_tkt_aname,		"rxkad-resp-tk-aname")	\
+	EM(rxkad_abort_resp_tkt_expired,	"rxkad-resp-tk-exp")	\
+	EM(rxkad_abort_resp_tkt_future,		"rxkad-resp-tk-future")	\
+	EM(rxkad_abort_resp_tkt_inst,		"rxkad-resp-tk-inst")	\
+	EM(rxkad_abort_resp_tkt_len,		"rxkad-resp-tk-len")	\
+	EM(rxkad_abort_resp_tkt_realm,		"rxkad-resp-tk-realm")	\
+	EM(rxkad_abort_resp_tkt_short,		"rxkad-resp-tk-short")	\
+	EM(rxkad_abort_resp_tkt_sinst,		"rxkad-resp-tk-sinst")	\
+	EM(rxkad_abort_resp_tkt_sname,		"rxkad-resp-tk-sname")	\
+	EM(rxkad_abort_resp_unknown_tkt,	"rxkad-resp-unknown-tkt") \
+	EM(rxkad_abort_resp_version,		"rxkad-resp-version")	\
+	/* rxrpc errors */						\
+	EM(rxrpc_abort_call_improper_term,	"call-improper-term")	\
+	EM(rxrpc_abort_call_reset,		"call-reset")		\
+	EM(rxrpc_abort_call_sendmsg,		"call-sendmsg")		\
+	EM(rxrpc_abort_call_sock_release,	"call-sock-rel")	\
+	EM(rxrpc_abort_call_sock_release_tba,	"call-sock-rel-tba")	\
+	EM(rxrpc_abort_call_timeout,		"call-timeout")		\
+	EM(rxrpc_abort_no_service_key,		"no-serv-key")		\
+	EM(rxrpc_abort_nomem,			"nomem")		\
+	EM(rxrpc_abort_service_not_offered,	"serv-not-offered")	\
+	EM(rxrpc_abort_shut_down,		"shut-down")		\
+	EM(rxrpc_abort_unsupported_security,	"unsup-sec")		\
+	EM(rxrpc_badmsg_bad_abort,		"bad-abort")		\
+	EM(rxrpc_badmsg_bad_jumbo,		"bad-jumbo")		\
+	EM(rxrpc_badmsg_short_ack,		"short-ack")		\
+	EM(rxrpc_badmsg_short_ack_info,		"short-ack-info")	\
+	EM(rxrpc_badmsg_short_hdr,		"short-hdr")		\
+	EM(rxrpc_badmsg_unsupported_packet,	"unsup-pkt")		\
+	EM(rxrpc_badmsg_zero_call,		"zero-call")		\
+	EM(rxrpc_badmsg_zero_seq,		"zero-seq")		\
+	EM(rxrpc_badmsg_zero_service,		"zero-service")		\
+	EM(rxrpc_eproto_ackr_outside_window,	"ackr-out-win")		\
+	EM(rxrpc_eproto_ackr_sack_overflow,	"ackr-sack-over")	\
+	EM(rxrpc_eproto_ackr_short_sack,	"ackr-short-sack")	\
+	EM(rxrpc_eproto_ackr_zero,		"ackr-zero")		\
+	EM(rxrpc_eproto_bad_upgrade,		"bad-upgrade")		\
+	EM(rxrpc_eproto_data_after_last,	"data-after-last")	\
+	EM(rxrpc_eproto_different_last,		"diff-last")		\
+	EM(rxrpc_eproto_early_reply,		"early-reply")		\
+	EM(rxrpc_eproto_improper_term,		"improper-term")	\
+	EM(rxrpc_eproto_no_client_call,		"no-cl-call")		\
+	EM(rxrpc_eproto_no_client_conn,		"no-cl-conn")		\
+	EM(rxrpc_eproto_no_service_call,	"no-sv-call")		\
+	EM(rxrpc_eproto_reupgrade,		"re-upgrade")		\
+	EM(rxrpc_eproto_rxnull_challenge,	"rxnull-chall")		\
+	EM(rxrpc_eproto_rxnull_response,	"rxnull-resp")		\
+	EM(rxrpc_eproto_tx_rot_last,		"tx-rot-last")		\
+	EM(rxrpc_eproto_unexpected_ack,		"unex-ack")		\
+	EM(rxrpc_eproto_unexpected_ackall,	"unex-ackall")		\
+	EM(rxrpc_eproto_unexpected_implicit_end, "unex-impl-end")	\
+	EM(rxrpc_eproto_unexpected_reply,	"unex-reply")		\
+	EM(rxrpc_eproto_wrong_security,		"wrong-sec")		\
+	EM(rxrpc_recvmsg_excess_data,		"recvmsg-excess")	\
+	EM(rxrpc_recvmsg_short_data,		"recvmsg-short")	\
+	E_(rxrpc_sendmsg_late_send,		"sendmsg-late")
+
 #define rxrpc_call_poke_traces \
 	EM(rxrpc_call_poke_abort,		"Abort")	\
 	EM(rxrpc_call_poke_complete,		"Compl")	\
@@ -382,6 +480,7 @@
 #define EM(a, b) a,
 #define E_(a, b) a
 
+enum rxrpc_abort_reason		{ rxrpc_abort_reasons } __mode(byte);
 enum rxrpc_bundle_trace		{ rxrpc_bundle_traces } __mode(byte);
 enum rxrpc_call_poke_trace	{ rxrpc_call_poke_traces } __mode(byte);
 enum rxrpc_call_trace		{ rxrpc_call_traces } __mode(byte);
@@ -410,9 +509,13 @@ enum rxrpc_txqueue_trace	{ rxrpc_txqueue_traces } __mode(byte);
  */
 #undef EM
 #undef E_
+
+#ifndef RXRPC_TRACE_ONLY_DEFINE_ENUMS
+
 #define EM(a, b) TRACE_DEFINE_ENUM(a);
 #define E_(a, b) TRACE_DEFINE_ENUM(a);
 
+rxrpc_abort_reasons;
 rxrpc_bundle_traces;
 rxrpc_call_poke_traces;
 rxrpc_call_traces;
@@ -663,14 +766,14 @@ TRACE_EVENT(rxrpc_rx_done,
 	    );
 
 TRACE_EVENT(rxrpc_abort,
-	    TP_PROTO(unsigned int call_nr, const char *why, u32 cid, u32 call_id,
-		     rxrpc_seq_t seq, int abort_code, int error),
+	    TP_PROTO(unsigned int call_nr, enum rxrpc_abort_reason why,
+		     u32 cid, u32 call_id, rxrpc_seq_t seq, int abort_code, int error),
 
 	    TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error),
 
 	    TP_STRUCT__entry(
 		    __field(unsigned int,		call_nr		)
-		    __array(char,			why, 4		)
+		    __field(enum rxrpc_abort_reason,	why		)
 		    __field(u32,			cid		)
 		    __field(u32,			call_id		)
 		    __field(rxrpc_seq_t,		seq		)
@@ -679,8 +782,8 @@ TRACE_EVENT(rxrpc_abort,
 			     ),
 
 	    TP_fast_assign(
-		    memcpy(__entry->why, why, 4);
 		    __entry->call_nr = call_nr;
+		    __entry->why = why;
 		    __entry->cid = cid;
 		    __entry->call_id = call_id;
 		    __entry->abort_code = abort_code;
@@ -691,7 +794,8 @@ TRACE_EVENT(rxrpc_abort,
 	    TP_printk("c=%08x %08x:%08x s=%u a=%d e=%d %s",
 		      __entry->call_nr,
 		      __entry->cid, __entry->call_id, __entry->seq,
-		      __entry->abort_code, __entry->error, __entry->why)
+		      __entry->abort_code, __entry->error,
+		      __print_symbolic(__entry->why, rxrpc_abort_reasons))
 	    );
 
 TRACE_EVENT(rxrpc_call_complete,
@@ -1527,30 +1631,6 @@ TRACE_EVENT(rxrpc_improper_term,
 		      __entry->abort_code)
 	    );
 
-TRACE_EVENT(rxrpc_rx_eproto,
-	    TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial,
-		     const char *why),
-
-	    TP_ARGS(call, serial, why),
-
-	    TP_STRUCT__entry(
-		    __field(unsigned int,		call		)
-		    __field(rxrpc_serial_t,		serial		)
-		    __field(const char *,		why		)
-			     ),
-
-	    TP_fast_assign(
-		    __entry->call = call ? call->debug_id : 0;
-		    __entry->serial = serial;
-		    __entry->why = why;
-			   ),
-
-	    TP_printk("c=%08x EPROTO %08x %s",
-		      __entry->call,
-		      __entry->serial,
-		      __entry->why)
-	    );
-
 TRACE_EVENT(rxrpc_connect_call,
 	    TP_PROTO(struct rxrpc_call *call),
 
@@ -1848,6 +1928,8 @@ TRACE_EVENT(rxrpc_call_poked,
 
 #undef EM
 #undef E_
+
+#endif /* RXRPC_TRACE_ONLY_DEFINE_ENUMS */
 #endif /* _TRACE_RXRPC_H */
 
 /* This part must be outside protection */
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 78bd6fb0bc15..120ce3ccbb22 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -627,9 +627,10 @@ struct rxrpc_call {
 	unsigned long		events;
 	spinlock_t		notify_lock;	/* Kernel notification lock */
 	rwlock_t		state_lock;	/* lock for state transition */
-	const char		*send_abort_why; /* String indicating why the abort was sent */
+	unsigned int		send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */
 	s32			send_abort;	/* Abort code to be sent */
 	short			send_abort_err;	/* Error to be associated with the abort */
+	rxrpc_seq_t		send_abort_seq;	/* DATA packet that incurred the abort (or 0) */
 	s32			abort_code;	/* Local/remote abort code */
 	int			error;		/* Local error incurred */
 	enum rxrpc_call_state	state;		/* current state of call */
@@ -818,9 +819,11 @@ extern struct workqueue_struct *rxrpc_workqueue;
  */
 int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
 void rxrpc_discard_prealloc(struct rxrpc_sock *);
-int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
-			    struct rxrpc_connection *, struct sockaddr_rxrpc *,
-			    struct sk_buff *);
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+			     struct rxrpc_peer *peer,
+			     struct rxrpc_connection *conn,
+			     struct sockaddr_rxrpc *peer_srx,
+			     struct sk_buff *skb);
 void rxrpc_accept_incoming_calls(struct rxrpc_local *);
 int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
 
@@ -840,7 +843,7 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call,
 			     unsigned long now,
 			     enum rxrpc_timer_trace why);
 
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
 
 /*
  * call_object.c
@@ -905,10 +908,10 @@ void rxrpc_clean_up_local_conns(struct rxrpc_local *);
 void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb,
 				unsigned int channel);
 int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
-		     s32 abort_code, int err, const char *why);
+		     s32 abort_code, int err, enum rxrpc_abort_reason why);
 void rxrpc_process_connection(struct work_struct *);
 void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
 void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb);
 
 static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn)
@@ -979,12 +982,19 @@ void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
  */
 int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
 void rxrpc_error_report(struct sock *);
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+			s32 abort_code, int err);
 int rxrpc_io_thread(void *data);
 static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
 {
 	wake_up_process(local->io_thread);
 }
 
+static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+	return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO);
+}
+
 /*
  * insecure.c
  */
@@ -1108,29 +1118,26 @@ bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion
 bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
 bool __rxrpc_call_completed(struct rxrpc_call *);
 bool rxrpc_call_completed(struct rxrpc_call *);
-bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
-bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
+bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+			u32 abort_code, int error, enum rxrpc_abort_reason why);
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+		      u32 abort_code, int error, enum rxrpc_abort_reason why);
 int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
 
 /*
  * Abort a call due to a protocol error.
  */
-static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
-					struct sk_buff *skb,
-					const char *eproto_why,
-					const char *why,
-					u32 abort_code)
+static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
+				     struct sk_buff *skb,
+				     s32 abort_code,
+				     enum rxrpc_abort_reason why)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
-	trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
-	return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+	rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why);
+	return -EPROTO;
 }
 
-#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
-	__rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
-			     (abort_why), (abort_code))
-
 /*
  * rtt.c
  */
@@ -1162,8 +1169,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *,
 /*
  * sendmsg.c
  */
-bool rxrpc_propose_abort(struct rxrpc_call *call,
-			 u32 abort_code, int error, const char *why);
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+			 enum rxrpc_abort_reason why);
 int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
 
 /*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index c957e4415cdc..a132d486dea0 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
  * If we want to report an error, we mark the skb with the packet type and
  * abort code and return false.
  */
-int rxrpc_new_incoming_call(struct rxrpc_local *local,
-			    struct rxrpc_peer *peer,
-			    struct rxrpc_connection *conn,
-			    struct sockaddr_rxrpc *peer_srx,
-			    struct sk_buff *skb)
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+			     struct rxrpc_peer *peer,
+			     struct rxrpc_connection *conn,
+			     struct sockaddr_rxrpc *peer_srx,
+			     struct sk_buff *skb)
 {
 	const struct rxrpc_security *sec = NULL;
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -339,10 +339,9 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
 
 	_enter("");
 
-	/* Don't set up a call for anything other than the first DATA packet. */
-	if (sp->hdr.seq != 1 ||
-	    sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
-		return 0; /* Just discard */
+	/* Don't set up a call for anything other than a DATA packet. */
+	if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+		return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call);
 
 	rcu_read_lock();
 
@@ -363,16 +362,14 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
 	if (!conn) {
 		sec = rxrpc_get_incoming_security(rx, skb);
 		if (!sec)
-			goto reject;
+			goto unsupported_security;
 	}
 
 	spin_lock(&rx->incoming_lock);
 	if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
 	    rx->sk.sk_state == RXRPC_CLOSE) {
-		trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber,
-				  sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
-		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-		skb->priority = RX_INVALID_OPERATION;
+		rxrpc_direct_abort(skb, rxrpc_abort_shut_down,
+				   RX_INVALID_OPERATION, -ESHUTDOWN);
 		goto no_call;
 	}
 
@@ -413,22 +410,24 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
 	_leave(" = %p{%d}", call, call->debug_id);
 	rxrpc_input_call_event(call, skb);
 	rxrpc_put_call(call, rxrpc_call_put_input);
-	return 0;
+	return true;
 
 unsupported_service:
-	trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-			  RX_INVALID_OPERATION, EOPNOTSUPP);
-	skb->priority = RX_INVALID_OPERATION;
-	goto reject;
+	rcu_read_unlock();
+	return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+				  RX_INVALID_OPERATION, -EOPNOTSUPP);
+unsupported_security:
+	rcu_read_unlock();
+	return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+				  RX_INVALID_OPERATION, -EKEYREJECTED);
 no_call:
 	spin_unlock(&rx->incoming_lock);
-reject:
 	rcu_read_unlock();
 	_leave(" = f [%u]", skb->mark);
-	return -EPROTO;
+	return false;
 discard:
 	rcu_read_unlock();
-	return 0;
+	return true;
 }
 
 /*
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index b2fc3fa686ec..695aeb70d1a6 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -333,7 +333,7 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
 /*
  * Handle retransmission and deferred ACK/abort generation.
  */
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 {
 	unsigned long now, next, t;
 	rxrpc_serial_t ackr_serial;
@@ -352,8 +352,8 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 	/* Handle abort request locklessly, vs rxrpc_propose_abort(). */
 	abort_code = smp_load_acquire(&call->send_abort);
 	if (abort_code) {
-		rxrpc_abort_call(call->send_abort_why, call, 0, call->send_abort,
-				 call->send_abort_err);
+		rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err,
+				 call->send_abort_why);
 		goto out;
 	}
 
@@ -440,9 +440,11 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
 		if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
 		    (int)call->conn->hi_serial - (int)call->rx_serial > 0) {
 			trace_rxrpc_call_reset(call);
-			rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
+			rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+					 rxrpc_abort_call_reset);
 		} else {
-			rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+			rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+					 rxrpc_abort_call_timeout);
 		}
 		goto out;
 	}
@@ -494,4 +496,5 @@ out:
 	if (call->acks_hard_ack != call->tx_bottom)
 		rxrpc_shrink_call_tx_buffer(call);
 	_leave("");
+	return true;
 }
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 666430182dfd..705f6e26cc75 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -581,7 +581,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 		call = list_entry(rx->to_be_accepted.next,
 				  struct rxrpc_call, accept_link);
 		list_del(&call->accept_link);
-		rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKR");
+		rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+				    rxrpc_abort_call_sock_release_tba);
 		rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
 	}
 
@@ -589,7 +590,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
 		call = list_entry(rx->sock_calls.next,
 				  struct rxrpc_call, sock_link);
 		rxrpc_get_call(call, rxrpc_call_get_release_sock);
-		rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET, "SKT");
+		rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+				    rxrpc_abort_call_sock_release);
 		rxrpc_release_call(rx, call);
 		rxrpc_put_call(call, rxrpc_call_put_release_sock);
 	}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 753d91a9646f..485d7f0fed2c 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -47,7 +47,7 @@ static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff
  * Mark a socket buffer to indicate that the connection it's on should be aborted.
  */
 int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
-		     s32 abort_code, int err, const char *why)
+		     s32 abort_code, int err, enum rxrpc_abort_reason why)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
@@ -288,8 +288,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 		return 0;
 
 	default:
-		trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
-				      tracepoint_string("bad_conn_pkt"));
+		WARN_ON_ONCE(1);
 		return -EPROTO;
 	}
 }
@@ -300,7 +299,8 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 static void rxrpc_secure_connection(struct rxrpc_connection *conn)
 {
 	if (conn->security->issue_challenge(conn) < 0)
-		rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM, "OOM");
+		rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM,
+				 rxrpc_abort_nomem);
 }
 
 /*
@@ -405,14 +405,14 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 /*
  * Input a connection-level packet.
  */
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_BUSY:
 		/* Just ignore BUSY packets for now. */
-		return 0;
+		return true;
 
 	case RXRPC_PACKET_TYPE_ABORT:
 		if (rxrpc_is_conn_aborted(conn))
@@ -429,12 +429,11 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
 			return true;
 		}
 		rxrpc_post_packet_to_conn(conn, skb);
-		return 0;
+		return true;
 
 	default:
-		trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
-				      tracepoint_string("bad_conn_pkt"));
-		return -EPROTO;
+		WARN_ON_ONCE(1);
+		return true;
 	}
 }
 
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index bb4beb445325..bd69ff2d9082 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -9,10 +9,10 @@
 
 #include "ar-internal.h"
 
-static void rxrpc_proto_abort(const char *why,
-			      struct rxrpc_call *call, rxrpc_seq_t seq)
+static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
+			      enum rxrpc_abort_reason why)
 {
-	rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG);
+	rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why);
 }
 
 /*
@@ -249,8 +249,8 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
  * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
  * or a final ACK packet.
  */
-static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
-			       const char *abort_why)
+static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+			       enum rxrpc_abort_reason abort_why)
 {
 	unsigned int state;
 
@@ -283,13 +283,12 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
 	else
 		trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
 	_leave(" = ok");
-	return true;
+	return;
 
 bad_state:
 	write_unlock(&call->state_lock);
 	kdebug("end_tx %s", rxrpc_call_states[call->state]);
-	rxrpc_proto_abort(abort_why, call, call->tx_top);
-	return false;
+	rxrpc_proto_abort(call, call->tx_top, abort_why);
 }
 
 /*
@@ -311,11 +310,13 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
 
 	if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
 		if (!rxrpc_rotate_tx_window(call, top, &summary)) {
-			rxrpc_proto_abort("TXL", call, top);
+			rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply);
 			return false;
 		}
 	}
-	return rxrpc_end_tx_phase(call, true, "ETD");
+
+	rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply);
+	return true;
 }
 
 static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
@@ -365,17 +366,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
 
 	if (last) {
 		if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
-		    seq + 1 != wtop) {
-			rxrpc_proto_abort("LSN", call, seq);
-			return;
-		}
+		    seq + 1 != wtop)
+			return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last);
 	} else {
 		if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
 		    after_eq(seq, wtop)) {
 			pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
 				call->debug_id, seq, window, wtop, wlimit);
-			rxrpc_proto_abort("LSA", call, seq);
-			return;
+			return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last);
 		}
 	}
 
@@ -583,7 +581,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
 		goto out_notify;
 
 	if (!rxrpc_input_split_jumbo(call, skb)) {
-		rxrpc_proto_abort("VLD", call, sp->hdr.seq);
+		rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo);
 		goto out_notify;
 	}
 	skb = NULL;
@@ -764,7 +762,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
 
 	offset = sizeof(struct rxrpc_wire_header);
 	if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
-		return rxrpc_proto_abort("XAK", call, 0);
+		return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
 	offset += sizeof(ack);
 
 	ack_serial = sp->hdr.serial;
@@ -844,7 +842,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
 	ioffset = offset + nr_acks + 3;
 	if (skb->len >= ioffset + sizeof(info) &&
 	    skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
-		return rxrpc_proto_abort("XAI", call, 0);
+		return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
 
 	if (nr_acks > 0)
 		skb_condense(skb);
@@ -867,7 +865,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
 		rxrpc_input_ackinfo(call, skb, &info);
 
 	if (first_soft_ack == 0)
-		return rxrpc_proto_abort("AK0", call, 0);
+		return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
 
 	/* Ignore ACKs unless we are or have just been transmitting. */
 	switch (READ_ONCE(call->state)) {
@@ -882,20 +880,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
 
 	if (before(hard_ack, call->acks_hard_ack) ||
 	    after(hard_ack, call->tx_top))
-		return rxrpc_proto_abort("AKW", call, 0);
+		return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window);
 	if (nr_acks > call->tx_top - hard_ack)
-		return rxrpc_proto_abort("AKN", call, 0);
+		return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);
 
 	if (after(hard_ack, call->acks_hard_ack)) {
 		if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
-			rxrpc_end_tx_phase(call, false, "ETA");
+			rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
 			return;
 		}
 	}
 
 	if (nr_acks > 0) {
 		if (offset > (int)skb->len - nr_acks)
-			return rxrpc_proto_abort("XSA", call, 0);
+			return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
 		rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
 				      nr_acks, &summary);
 	}
@@ -917,7 +915,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
 	struct rxrpc_ack_summary summary = { 0 };
 
 	if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
-		rxrpc_end_tx_phase(call, false, "ETL");
+		rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall);
 }
 
 /*
@@ -962,27 +960,23 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
 
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_DATA:
-		rxrpc_input_data(call, skb);
-		break;
+		return rxrpc_input_data(call, skb);
 
 	case RXRPC_PACKET_TYPE_ACK:
-		rxrpc_input_ack(call, skb);
-		break;
+		return rxrpc_input_ack(call, skb);
 
 	case RXRPC_PACKET_TYPE_BUSY:
 		/* Just ignore BUSY packets from the server; the retry and
 		 * lifespan timers will take care of business.  BUSY packets
 		 * from the client don't make sense.
 		 */
-		break;
+		return;
 
 	case RXRPC_PACKET_TYPE_ABORT:
-		rxrpc_input_abort(call, skb);
-		break;
+		return rxrpc_input_abort(call, skb);
 
 	case RXRPC_PACKET_TYPE_ACKALL:
-		rxrpc_input_ackall(call, skb);
-		break;
+		return rxrpc_input_ackall(call, skb);
 
 	default:
 		break;
@@ -1004,7 +998,8 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
 	case RXRPC_CALL_COMPLETE:
 		break;
 	default:
-		rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN);
+		rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN,
+				 rxrpc_eproto_improper_term);
 		trace_rxrpc_improper_term(call);
 		break;
 	}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 29dcc7d3f51a..34353b6e584b 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -45,13 +45,15 @@ static void none_free_call_crypto(struct rxrpc_call *call)
 static int none_respond_to_challenge(struct rxrpc_connection *conn,
 				     struct sk_buff *skb)
 {
-	return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN");
+	return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+				rxrpc_eproto_rxnull_challenge);
 }
 
 static int none_verify_response(struct rxrpc_connection *conn,
 				struct sk_buff *skb)
 {
-	return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO, "RXN");
+	return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+				rxrpc_eproto_rxnull_response);
 }
 
 static void none_clear(struct rxrpc_connection *conn)
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 46e58cf5bc96..33fd2394c8b3 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -66,10 +66,32 @@ void rxrpc_error_report(struct sock *sk)
 	rcu_read_unlock();
 }
 
+/*
+ * Directly produce an abort from a packet.
+ */
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+			s32 abort_code, int err)
+{
+	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+	trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+			  abort_code, err);
+	skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+	skb->priority = abort_code;
+	return false;
+}
+
+static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+	return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG);
+}
+
+#define just_discard true
+
 /*
  * Process event packets targeted at a local endpoint.
  */
-static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
+static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	char v;
@@ -81,22 +103,21 @@ static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
 		if (v == 0)
 			rxrpc_send_version_request(local, &sp->hdr, skb);
 	}
+
+	return true;
 }
 
 /*
  * Extract the wire header from a packet and translate the byte order.
  */
-static noinline
-int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
+static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
+				 struct sk_buff *skb)
 {
 	struct rxrpc_wire_header whdr;
 
 	/* dig out the RxRPC connection details */
-	if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
-		trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
-				      tracepoint_string("bad_hdr"));
-		return -EBADMSG;
-	}
+	if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+		return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr);
 
 	memset(sp, 0, sizeof(*sp));
 	sp->hdr.epoch		= ntohl(whdr.epoch);
@@ -110,7 +131,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
 	sp->hdr.securityIndex	= whdr.securityIndex;
 	sp->hdr._rsvd		= ntohs(whdr._rsvd);
 	sp->hdr.serviceId	= ntohs(whdr.serviceId);
-	return 0;
+	return true;
 }
 
 /*
@@ -130,28 +151,28 @@ static bool rxrpc_extract_abort(struct sk_buff *skb)
 /*
  * Process packets received on the local endpoint
  */
-static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
+static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
 {
 	struct rxrpc_connection *conn;
 	struct sockaddr_rxrpc peer_srx;
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_peer *peer = NULL;
 	struct sk_buff *skb = *_skb;
-	int ret = 0;
+	bool ret = false;
 
 	skb_pull(skb, sizeof(struct udphdr));
 
 	sp = rxrpc_skb(skb);
 
 	/* dig out the RxRPC connection details */
-	if (rxrpc_extract_header(sp, skb) < 0)
-		goto bad_message;
+	if (!rxrpc_extract_header(sp, skb))
+		return just_discard;
 
 	if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
 		static int lose;
 		if ((lose++ & 7) == 7) {
 			trace_rxrpc_rx_lose(sp);
-			return 0;
+			return just_discard;
 		}
 	}
 
@@ -160,28 +181,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
 	switch (sp->hdr.type) {
 	case RXRPC_PACKET_TYPE_VERSION:
 		if (rxrpc_to_client(sp))
-			return 0;
-		rxrpc_input_version(local, skb);
-		return 0;
+			return just_discard;
+		return rxrpc_input_version(local, skb);
 
 	case RXRPC_PACKET_TYPE_BUSY:
 		if (rxrpc_to_server(sp))
-			return 0;
+			return just_discard;
 		fallthrough;
 	case RXRPC_PACKET_TYPE_ACK:
 	case RXRPC_PACKET_TYPE_ACKALL:
 		if (sp->hdr.callNumber == 0)
-			goto bad_message;
+			return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
 		break;
 	case RXRPC_PACKET_TYPE_ABORT:
 		if (!rxrpc_extract_abort(skb))
-			return 0; /* Just discard if malformed */
+			return just_discard; /* Just discard if malformed */
 		break;
 
 	case RXRPC_PACKET_TYPE_DATA:
-		if (sp->hdr.callNumber == 0 ||
-		    sp->hdr.seq == 0)
-			goto bad_message;
+		if (sp->hdr.callNumber == 0)
+			return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
+		if (sp->hdr.seq == 0)
+			return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq);
 
 		/* Unshare the packet so that it can be modified for in-place
 		 * decryption.
@@ -191,7 +212,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
 			if (!skb) {
 				rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem);
 				*_skb = NULL;
-				return 0;
+				return just_discard;
 			}
 
 			if (skb != *_skb) {
@@ -205,28 +226,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
 
 	case RXRPC_PACKET_TYPE_CHALLENGE:
 		if (rxrpc_to_server(sp))
-			return 0;
+			return just_discard;
 		break;
 	case RXRPC_PACKET_TYPE_RESPONSE:
 		if (rxrpc_to_client(sp))
-			return 0;
+			return just_discard;
 		break;
 
 		/* Packet types 9-11 should just be ignored. */
 	case RXRPC_PACKET_TYPE_PARAMS:
 	case RXRPC_PACKET_TYPE_10:
 	case RXRPC_PACKET_TYPE_11:
-		return 0;
+		return just_discard;
 
 	default:
-		goto bad_message;
+		return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet);
 	}
 
 	if (sp->hdr.serviceId == 0)
-		goto bad_message;
+		return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service);
 
 	if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0))
-		return true; /* Unsupported address type - discard. */
+		return just_discard; /* Unsupported address type. */
 
 	if (peer_srx.transport.family != local->srx.transport.family &&
 	    (peer_srx.transport.family == AF_INET &&
@@ -234,7 +255,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
 		pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
 				    peer_srx.transport.family,
 				    local->srx.transport.family);
-		return true; /* Wrong address type - discard. */
+		return just_discard; /* Wrong address type. */
 	}
 
 	if (rxrpc_to_client(sp)) {
@@ -242,12 +263,8 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
 		conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb);
 		conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
 		rcu_read_unlock();
-		if (!conn) {
-			trace_rxrpc_abort(0, "NCC", sp->hdr.cid,
-					  sp->hdr.callNumber, sp->hdr.seq,
-					  RXKADINCONSISTENCY, EBADMSG);
-			goto protocol_error;
-		}
+		if (!conn)
+			return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn);
 
 		ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
 		rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
@@ -280,19 +297,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
 
 	ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb);
 	rxrpc_put_peer(peer, rxrpc_peer_put_input);
-	if (ret < 0)
-		goto reject_packet;
-	return 0;
-
-bad_message:
-	trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-			  RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
-	skb->priority = RX_PROTOCOL_ERROR;
-	skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
-	rxrpc_reject_packet(local, skb);
-	return 0;
+	return ret;
 }
 
 /*
@@ -306,21 +311,23 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
 	struct rxrpc_channel *chan;
 	struct rxrpc_call *call = NULL;
 	unsigned int channel;
+	bool ret;
 
 	if (sp->hdr.securityIndex != conn->security_ix)
-		goto wrong_security;
+		return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security,
+					  RXKADINCONSISTENCY, -EBADMSG);
 
 	if (sp->hdr.serviceId != conn->service_id) {
 		int old_id;
 
 		if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
-			goto reupgrade;
+			return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade);
+
 		old_id = cmpxchg(&conn->service_id, conn->orig_service_id,
 				 sp->hdr.serviceId);
-
 		if (old_id != conn->orig_service_id &&
 		    old_id != sp->hdr.serviceId)
-			goto reupgrade;
+			return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade);
 	}
 
 	if (after(sp->hdr.serial, conn->hi_serial))
@@ -336,19 +343,19 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
 
 	/* Ignore really old calls */
 	if (sp->hdr.callNumber < chan->last_call)
-		return 0;
+		return just_discard;
 
 	if (sp->hdr.callNumber == chan->last_call) {
 		if (chan->call ||
 		    sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
-			return 0;
+			return just_discard;
 
 		/* For the previous service call, if completed successfully, we
 		 * discard all further packets.
 		 */
 		if (rxrpc_conn_is_service(conn) &&
 		    chan->last_type == RXRPC_PACKET_TYPE_ACK)
-			return 0;
+			return just_discard;
 
 		/* But otherwise we need to retransmit the final packet from
 		 * data cached in the connection record.
@@ -359,7 +366,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
 					    sp->hdr.serial,
 					    sp->hdr.flags);
 		rxrpc_conn_retransmit_call(conn, skb, channel);
-		return 0;
+		return just_discard;
 	}
 
 	rcu_read_lock();
@@ -370,7 +377,8 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
 	if (sp->hdr.callNumber > chan->call_id) {
 		if (rxrpc_to_client(sp)) {
 			rxrpc_put_call(call, rxrpc_call_put_input);
-			goto reject_packet;
+			return rxrpc_protocol_error(skb,
+						    rxrpc_eproto_unexpected_implicit_end);
 		}
 
 		if (call) {
@@ -382,38 +390,14 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
 
 	if (!call) {
 		if (rxrpc_to_client(sp))
-			goto bad_message;
-		if (rxrpc_new_incoming_call(conn->local, conn->peer, conn,
-					    peer_srx, skb) == 0)
-			return 0;
-		goto reject_packet;
+			return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call);
+		return rxrpc_new_incoming_call(conn->local, conn->peer, conn,
+					       peer_srx, skb);
 	}
 
-	rxrpc_input_call_event(call, skb);
+	ret = rxrpc_input_call_event(call, skb);
 	rxrpc_put_call(call, rxrpc_call_put_input);
-	return 0;
-
-wrong_security:
-	trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-			  RXKADINCONSISTENCY, EBADMSG);
-	skb->priority = RXKADINCONSISTENCY;
-	goto post_abort;
-
-reupgrade:
-	trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-			  RX_PROTOCOL_ERROR, EBADMSG);
-	goto protocol_error;
-
-bad_message:
-	trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-			  RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
-	skb->priority = RX_PROTOCOL_ERROR;
-post_abort:
-	skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
-	rxrpc_reject_packet(conn->local, skb);
-	return 0;
+	return ret;
 }
 
 /*
@@ -470,7 +454,8 @@ int rxrpc_io_thread(void *data)
 			switch (skb->mark) {
 			case RXRPC_SKB_MARK_PACKET:
 				skb->priority = 0;
-				rxrpc_input_packet(local, &skb);
+				if (!rxrpc_input_packet(local, &skb))
+					rxrpc_reject_packet(local, skb);
 				trace_rxrpc_rx_done(skb->mark, skb->priority);
 				rxrpc_free_skb(skb, rxrpc_skb_put_input);
 				break;
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 8d5fe65f5951..59b521b82aec 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -117,8 +117,8 @@ bool rxrpc_call_completed(struct rxrpc_call *call)
 /*
  * Record that a call is locally aborted.
  */
-bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
-			rxrpc_seq_t seq, u32 abort_code, int error)
+bool __rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+			u32 abort_code, int error, enum rxrpc_abort_reason why)
 {
 	trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
 			  abort_code, error);
@@ -126,13 +126,13 @@ bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
 					   abort_code, error);
 }
 
-bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
-		      rxrpc_seq_t seq, u32 abort_code, int error)
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+		      u32 abort_code, int error, enum rxrpc_abort_reason why)
 {
 	bool ret;
 
 	write_lock(&call->state_lock);
-	ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
+	ret = __rxrpc_abort_call(call, seq, abort_code, error, why);
 	write_unlock(&call->state_lock);
 	if (ret && test_bit(RXRPC_CALL_EXPOSED, &call->flags))
 		rxrpc_send_abort_packet(call);
@@ -642,11 +642,15 @@ out:
 	return ret;
 
 short_data:
-	trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data"));
+	trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data,
+			  call->cid, call->call_id, call->rx_consumed,
+			  0, -EBADMSG);
 	ret = -EBADMSG;
 	goto out;
 excess_data:
-	trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data"));
+	trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data,
+			  call->cid, call->call_id, call->rx_consumed,
+			  0, -EMSGSIZE);
 	ret = -EMSGSIZE;
 	goto out;
 call_complete:
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 5d2fbc6ec3cf..e52cb8058156 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -411,18 +411,15 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_crypt iv;
 	struct scatterlist sg[16];
-	bool aborted;
 	u32 data_size, buf;
 	u16 check;
 	int ret;
 
 	_enter("");
 
-	if (sp->len < 8) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
-					     RXKADSEALEDINCON);
-		goto protocol_error;
-	}
+	if (sp->len < 8)
+		return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+					  rxkad_abort_1_short_header);
 
 	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
 	 * directly into the target buffer.
@@ -442,11 +439,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 	skcipher_request_zero(req);
 
 	/* Extract the decrypted packet length */
-	if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
-					     RXKADDATALEN);
-		goto protocol_error;
-	}
+	if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+		return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+					  rxkad_abort_1_short_encdata);
 	sp->offset += sizeof(sechdr);
 	sp->len    -= sizeof(sechdr);
 
@@ -456,26 +451,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
 	check = buf >> 16;
 	check ^= seq ^ call->call_id;
 	check &= 0xffff;
-	if (check != 0) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C",
-					     RXKADSEALEDINCON);
-		goto protocol_error;
-	}
-
-	if (data_size > sp->len) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
-					     RXKADDATALEN);
-		goto protocol_error;
-	}
+	if (check != 0)
+		return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+					  rxkad_abort_1_short_check);
+	if (data_size > sp->len)
+		return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+					  rxkad_abort_1_short_data);
 	sp->len = data_size;
 
 	_leave(" = 0 [dlen=%x]", data_size);
 	return 0;
-
-protocol_error:
-	if (aborted)
-		rxrpc_send_abort_packet(call);
-	return -EPROTO;
 }
 
 /*
@@ -490,18 +475,15 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_crypt iv;
 	struct scatterlist _sg[4], *sg;
-	bool aborted;
 	u32 data_size, buf;
 	u16 check;
 	int nsg, ret;
 
 	_enter(",{%d}", sp->len);
 
-	if (sp->len < 8) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
-					     RXKADSEALEDINCON);
-		goto protocol_error;
-	}
+	if (sp->len < 8)
+		return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+					  rxkad_abort_2_short_header);
 
 	/* Decrypt the skbuff in-place.  TODO: We really want to decrypt
 	 * directly into the target buffer.
@@ -513,7 +495,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	} else {
 		sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
 		if (!sg)
-			goto nomem;
+			return -ENOMEM;
 	}
 
 	sg_init_table(sg, nsg);
@@ -537,11 +519,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 		kfree(sg);
 
 	/* Extract the decrypted packet length */
-	if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
-					     RXKADDATALEN);
-		goto protocol_error;
-	}
+	if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+		return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+					  rxkad_abort_2_short_len);
 	sp->offset += sizeof(sechdr);
 	sp->len    -= sizeof(sechdr);
 
@@ -551,30 +531,17 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
 	check = buf >> 16;
 	check ^= seq ^ call->call_id;
 	check &= 0xffff;
-	if (check != 0) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C",
-					     RXKADSEALEDINCON);
-		goto protocol_error;
-	}
+	if (check != 0)
+		return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+					  rxkad_abort_2_short_check);
 
-	if (data_size > sp->len) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
-					     RXKADDATALEN);
-		goto protocol_error;
-	}
+	if (data_size > sp->len)
+		return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+					  rxkad_abort_2_short_data);
 
 	sp->len = data_size;
 	_leave(" = 0 [dlen=%x]", data_size);
 	return 0;
-
-protocol_error:
-	if (aborted)
-		rxrpc_send_abort_packet(call);
-	return -EPROTO;
-
-nomem:
-	_leave(" = -ENOMEM");
-	return -ENOMEM;
 }
 
 /*
@@ -590,7 +557,6 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
 		__be32 buf[2];
 	} crypto __aligned(8);
 	rxrpc_seq_t seq = sp->hdr.seq;
-	bool aborted;
 	int ret;
 	u16 cksum;
 	u32 x, y;
@@ -627,9 +593,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
 		cksum = 1; /* zero checksums are not permitted */
 
 	if (cksum != sp->hdr.cksum) {
-		aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
-					     RXKADSEALEDINCON);
-		goto protocol_error;
+		ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+					 rxkad_abort_bad_checksum);
+		goto out;
 	}
 
 	switch (call->conn->security_level) {
@@ -647,13 +613,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
 		break;
 	}
 
+out:
 	skcipher_request_free(req);
 	return ret;
-
-protocol_error:
-	if (aborted)
-		rxrpc_send_abort_packet(call);
-	return -EPROTO;
 }
 
 /*
@@ -827,27 +789,24 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 	struct rxkad_challenge challenge;
 	struct rxkad_response *resp;
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	const char *eproto;
-	u32 version, nonce, min_level, abort_code;
-	int ret;
+	u32 version, nonce, min_level;
+	int ret = -EPROTO;
 
 	_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
 
-	eproto = tracepoint_string("chall_no_key");
-	abort_code = RX_PROTOCOL_ERROR;
 	if (!conn->key)
-		goto protocol_error;
+		return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+					rxkad_abort_chall_no_key);
 
-	abort_code = RXKADEXPIRED;
 	ret = key_validate(conn->key);
 	if (ret < 0)
-		goto other_error;
+		return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+					rxkad_abort_chall_key_expired);
 
-	eproto = tracepoint_string("chall_short");
-	abort_code = RXKADPACKETSHORT;
 	if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
 			  &challenge, sizeof(challenge)) < 0)
-		goto protocol_error;
+		return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+					rxkad_abort_chall_short);
 
 	version = ntohl(challenge.version);
 	nonce = ntohl(challenge.nonce);
@@ -855,15 +814,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 
 	trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
 
-	eproto = tracepoint_string("chall_ver");
-	abort_code = RXKADINCONSISTENCY;
 	if (version != RXKAD_VERSION)
-		goto protocol_error;
+		return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+					rxkad_abort_chall_version);
 
-	abort_code = RXKADLEVELFAIL;
-	ret = -EACCES;
 	if (conn->security_level < min_level)
-		goto other_error;
+		return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES,
+					rxkad_abort_chall_level);
 
 	token = conn->key->payload.data[0];
 
@@ -892,13 +849,6 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
 		ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
 	kfree(resp);
 	return ret;
-
-protocol_error:
-	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
-	ret = -EPROTO;
-other_error:
-	rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK");
-	return ret;
 }
 
 /*
@@ -912,16 +862,12 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 				time64_t *_expiry)
 {
 	struct skcipher_request *req;
-	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_crypt iv, key;
 	struct scatterlist sg[1];
 	struct in_addr addr;
 	unsigned int life;
-	const char *eproto;
 	time64_t issue, now;
 	bool little_endian;
-	int ret;
-	u32 abort_code;
 	u8 *p, *q, *name, *end;
 
 	_enter("{%d},{%x}", conn->debug_id, key_serial(server_key));
@@ -933,10 +879,9 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 
 	memcpy(&iv, &server_key->payload.data[2], sizeof(iv));
 
-	ret = -ENOMEM;
 	req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS);
 	if (!req)
-		goto temporary_error;
+		return -ENOMEM;
 
 	sg_init_one(&sg[0], ticket, ticket_len);
 	skcipher_request_set_callback(req, 0, NULL, NULL);
@@ -947,18 +892,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	p = ticket;
 	end = p + ticket_len;
 
-#define Z(field)					\
-	({						\
-		u8 *__str = p;				\
-		eproto = tracepoint_string("rxkad_bad_"#field); \
-		q = memchr(p, 0, end - p);		\
-		if (!q || q - p > (field##_SZ))		\
-			goto bad_ticket;		\
-		for (; p < q; p++)			\
-			if (!isprint(*p))		\
-				goto bad_ticket;	\
-		p++;					\
-		__str;					\
+#define Z(field, fieldl)						\
+	({								\
+		u8 *__str = p;						\
+		q = memchr(p, 0, end - p);				\
+		if (!q || q - p > field##_SZ)				\
+			return rxrpc_abort_conn(			\
+				conn, skb, RXKADBADTICKET, -EPROTO,	\
+				rxkad_abort_resp_tkt_##fieldl);		\
+		for (; p < q; p++)					\
+			if (!isprint(*p))				\
+				return rxrpc_abort_conn(		\
+					conn, skb, RXKADBADTICKET, -EPROTO, \
+					rxkad_abort_resp_tkt_##fieldl);	\
+		p++;							\
+		__str;							\
 	})
 
 	/* extract the ticket flags */
@@ -967,20 +915,20 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	p++;
 
 	/* extract the authentication name */
-	name = Z(ANAME);
+	name = Z(ANAME, aname);
 	_debug("KIV ANAME: %s", name);
 
 	/* extract the principal's instance */
-	name = Z(INST);
+	name = Z(INST, inst);
 	_debug("KIV INST : %s", name);
 
 	/* extract the principal's authentication domain */
-	name = Z(REALM);
+	name = Z(REALM, realm);
 	_debug("KIV REALM: %s", name);
 
-	eproto = tracepoint_string("rxkad_bad_len");
 	if (end - p < 4 + 8 + 4 + 2)
-		goto bad_ticket;
+		return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO,
+					rxkad_abort_resp_tkt_short);
 
 	/* get the IPv4 address of the entity that requested the ticket */
 	memcpy(&addr, p, sizeof(addr));
@@ -1012,37 +960,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
 	_debug("KIV ISSUE: %llx [%llx]", issue, now);
 
 	/* check the ticket is in date */
-	if (issue > now) {
-		abort_code = RXKADNOAUTH;
-		ret = -EKEYREJECTED;
-		goto other_error;
-	}
-
-	if (issue < now - life) {
-		abort_code = RXKADEXPIRED;
-		ret = -EKEYEXPIRED;
-		goto other_error;
-	}
+	if (issue > now)
+		return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED,
+					rxkad_abort_resp_tkt_future);
+	if (issue < now - life)
+		return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED,
+					rxkad_abort_resp_tkt_expired);
 
 	*_expiry = issue + life;
 
 	/* get the service name */
-	name = Z(SNAME);
+	name = Z(SNAME, sname);
 	_debug("KIV SNAME: %s", name);
 
 	/* get the service instance name */
-	name = Z(INST);
+	name = Z(INST, sinst);
 	_debug("KIV SINST: %s", name);
 	return 0;
-
-bad_ticket:
-	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
-	abort_code = RXKADBADTICKET;
-	ret = -EPROTO;
-other_error:
-	return rxrpc_abort_conn(conn, skb, abort_code, ret, "RXK");
-temporary_error:
-	return ret;
 }
 
 /*
@@ -1089,10 +1023,9 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 	struct rxrpc_crypt session_key;
 	struct key *server_key;
-	const char *eproto;
 	time64_t expiry;
 	void *ticket;
-	u32 abort_code, version, kvno, ticket_len, level;
+	u32 version, kvno, ticket_len, level;
 	__be32 csum;
 	int ret, i;
 
@@ -1100,19 +1033,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 
 	server_key = rxrpc_look_up_server_security(conn, skb, 0, 0);
 	if (IS_ERR(server_key)) {
-		switch (PTR_ERR(server_key)) {
+		ret = PTR_ERR(server_key);
+		switch (ret) {
 		case -ENOKEY:
-			abort_code = RXKADUNKNOWNKEY;
-			break;
+			return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret,
+						rxkad_abort_resp_nokey);
 		case -EKEYEXPIRED:
-			abort_code = RXKADEXPIRED;
-			break;
+			return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+						rxkad_abort_resp_key_expired);
 		default:
-			abort_code = RXKADNOAUTH;
-			break;
+			return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret,
+						rxkad_abort_resp_key_rejected);
 		}
-		return rxrpc_abort_conn(conn, skb, abort_code,
-					PTR_ERR(server_key), "RXK");
 	}
 
 	ret = -ENOMEM;
@@ -1120,11 +1052,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	if (!response)
 		goto temporary_error;
 
-	eproto = tracepoint_string("rxkad_rsp_short");
-	abort_code = RXKADPACKETSHORT;
 	if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
-			  response, sizeof(*response)) < 0)
+			  response, sizeof(*response)) < 0) {
+		rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+				 rxkad_abort_resp_short);
 		goto protocol_error;
+	}
 
 	version = ntohl(response->version);
 	ticket_len = ntohl(response->ticket_len);
@@ -1132,20 +1065,23 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 
 	trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
 
-	eproto = tracepoint_string("rxkad_rsp_ver");
-	abort_code = RXKADINCONSISTENCY;
-	if (version != RXKAD_VERSION)
+	if (version != RXKAD_VERSION) {
+		rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+				 rxkad_abort_resp_version);
 		goto protocol_error;
+	}
 
-	eproto = tracepoint_string("rxkad_rsp_tktlen");
-	abort_code = RXKADTICKETLEN;
-	if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+	if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) {
+		rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO,
+				 rxkad_abort_resp_tkt_len);
 		goto protocol_error;
+	}
 
-	eproto = tracepoint_string("rxkad_rsp_unkkey");
-	abort_code = RXKADUNKNOWNKEY;
-	if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+	if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) {
+		rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO,
+				 rxkad_abort_resp_unknown_tkt);
 		goto protocol_error;
+	}
 
 	/* extract the kerberos ticket and decrypt and decode it */
 	ret = -ENOMEM;
@@ -1153,12 +1089,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	if (!ticket)
 		goto temporary_error_free_resp;
 
-	eproto = tracepoint_string("rxkad_tkt_short");
-	abort_code = RXKADPACKETSHORT;
-	ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
-			    ticket, ticket_len);
-	if (ret < 0)
-		goto temporary_error_free_ticket;
+	if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
+			  ticket, ticket_len) < 0) {
+		rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+				 rxkad_abort_resp_short_tkt);
+		goto protocol_error;
+	}
 
 	ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len,
 				   &session_key, &expiry);
@@ -1169,56 +1105,66 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	 * response */
 	rxkad_decrypt_response(conn, response, &session_key);
 
-	eproto = tracepoint_string("rxkad_rsp_param");
-	abort_code = RXKADSEALEDINCON;
-	if (ntohl(response->encrypted.epoch) != conn->proto.epoch)
-		goto protocol_error_free;
-	if (ntohl(response->encrypted.cid) != conn->proto.cid)
-		goto protocol_error_free;
-	if (ntohl(response->encrypted.securityIndex) != conn->security_ix)
+	if (ntohl(response->encrypted.epoch) != conn->proto.epoch ||
+	    ntohl(response->encrypted.cid) != conn->proto.cid ||
+	    ntohl(response->encrypted.securityIndex) != conn->security_ix) {
+		rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+				 rxkad_abort_resp_bad_param);
 		goto protocol_error_free;
+	}
+
 	csum = response->encrypted.checksum;
 	response->encrypted.checksum = 0;
 	rxkad_calc_response_checksum(response);
-	eproto = tracepoint_string("rxkad_rsp_csum");
-	if (response->encrypted.checksum != csum)
+	if (response->encrypted.checksum != csum) {
+		rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+				 rxkad_abort_resp_bad_checksum);
 		goto protocol_error_free;
+	}
 
 	spin_lock(&conn->bundle->channel_lock);
 	for (i = 0; i < RXRPC_MAXCALLS; i++) {
 		struct rxrpc_call *call;
 		u32 call_id = ntohl(response->encrypted.call_id[i]);
 
-		eproto = tracepoint_string("rxkad_rsp_callid");
-		if (call_id > INT_MAX)
+		if (call_id > INT_MAX) {
+			rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+					 rxkad_abort_resp_bad_callid);
 			goto protocol_error_unlock;
+		}
 
-		eproto = tracepoint_string("rxkad_rsp_callctr");
-		if (call_id < conn->channels[i].call_counter)
+		if (call_id < conn->channels[i].call_counter) {
+			rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+					 rxkad_abort_resp_call_ctr);
 			goto protocol_error_unlock;
+		}
 
-		eproto = tracepoint_string("rxkad_rsp_callst");
 		if (call_id > conn->channels[i].call_counter) {
 			call = rcu_dereference_protected(
 				conn->channels[i].call,
 				lockdep_is_held(&conn->bundle->channel_lock));
-			if (call && call->state < RXRPC_CALL_COMPLETE)
+			if (call && call->state < RXRPC_CALL_COMPLETE) {
+				rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+						 rxkad_abort_resp_call_state);
 				goto protocol_error_unlock;
+			}
 			conn->channels[i].call_counter = call_id;
 		}
 	}
 	spin_unlock(&conn->bundle->channel_lock);
 
-	eproto = tracepoint_string("rxkad_rsp_seq");
-	abort_code = RXKADOUTOFSEQUENCE;
-	if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1)
+	if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) {
+		rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO,
+				 rxkad_abort_resp_ooseq);
 		goto protocol_error_free;
+	}
 
-	eproto = tracepoint_string("rxkad_rsp_level");
-	abort_code = RXKADLEVELFAIL;
 	level = ntohl(response->encrypted.level);
-	if (level > RXRPC_SECURITY_ENCRYPT)
+	if (level > RXRPC_SECURITY_ENCRYPT) {
+		rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO,
+				 rxkad_abort_resp_level);
 		goto protocol_error_free;
+	}
 	conn->security_level = level;
 
 	/* create a key to hold the security data and expiration time - after
@@ -1240,8 +1186,7 @@ protocol_error_free:
 protocol_error:
 	kfree(response);
 	key_put(server_key);
-	trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
-	return rxrpc_abort_conn(conn, skb, abort_code, -EPROTO, "RXK");
+	return -EPROTO;
 
 temporary_error_free_ticket:
 	kfree(ticket);
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index d33a109e846c..16dcabb71ebe 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -10,6 +10,8 @@
 #include <linux/slab.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
 
 MODULE_DESCRIPTION("rxperf test server (afs)");
 MODULE_AUTHOR("Red Hat, Inc.");
@@ -307,12 +309,14 @@ static void rxperf_deliver_to_call(struct work_struct *work)
 		case -EOPNOTSUPP:
 			abort_code = RXGEN_OPCODE;
 			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
-						abort_code, ret, "GOP");
+						abort_code, ret,
+						rxperf_abort_op_not_supported);
 			goto call_complete;
 		case -ENOTSUPP:
 			abort_code = RX_USER_ABORT;
 			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
-						abort_code, ret, "GUA");
+						abort_code, ret,
+						rxperf_abort_op_not_supported);
 			goto call_complete;
 		case -EIO:
 			pr_err("Call %u in bad state %u\n",
@@ -324,11 +328,13 @@ static void rxperf_deliver_to_call(struct work_struct *work)
 		case -ENOMEM:
 		case -EFAULT:
 			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
-						RXGEN_SS_UNMARSHAL, ret, "GUM");
+						RXGEN_SS_UNMARSHAL, ret,
+						rxperf_abort_unmarshal_error);
 			goto call_complete;
 		default:
 			rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
-						RX_CALL_DEAD, ret, "GER");
+						RX_CALL_DEAD, ret,
+						rxperf_abort_general_error);
 			goto call_complete;
 		}
 	}
@@ -523,7 +529,8 @@ static int rxperf_process_call(struct rxperf_call *call)
 
 	if (n == -ENOMEM)
 		rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
-					RXGEN_SS_MARSHAL, -ENOMEM, "GOM");
+					RXGEN_SS_MARSHAL, -ENOMEM,
+					rxperf_abort_oom);
 	return n;
 }
 
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index ab968f65a490..78af14694618 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -144,21 +144,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx,
 
 	sec = rxrpc_security_lookup(sp->hdr.securityIndex);
 	if (!sec) {
-		trace_rxrpc_abort(0, "SVS",
-				  sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-				  RX_INVALID_OPERATION, EKEYREJECTED);
-		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-		skb->priority = RX_INVALID_OPERATION;
+		rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security,
+				   RX_INVALID_OPERATION, -EKEYREJECTED);
 		return NULL;
 	}
 
 	if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE &&
 	    !rx->securities) {
-		trace_rxrpc_abort(0, "SVR",
-				  sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
-				  RX_INVALID_OPERATION, EKEYREJECTED);
-		skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-		skb->priority = sec->no_key_abort;
+		rxrpc_direct_abort(skb, rxrpc_abort_no_service_key,
+				   sec->no_key_abort, -EKEYREJECTED);
 		return NULL;
 	}
 
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index dc3c2a834fc8..d67808b659f1 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -20,14 +20,15 @@
 /*
  * Propose an abort to be made in the I/O thread.
  */
-bool rxrpc_propose_abort(struct rxrpc_call *call,
-			 u32 abort_code, int error, const char *why)
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+			 enum rxrpc_abort_reason why)
 {
-	_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+	_enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
 
 	if (!call->send_abort && call->state < RXRPC_CALL_COMPLETE) {
 		call->send_abort_why = why;
 		call->send_abort_err = error;
+		call->send_abort_seq = 0;
 		/* Request abort locklessly vs rxrpc_input_call_event(). */
 		smp_store_release(&call->send_abort, abort_code);
 		rxrpc_poke_call(call, rxrpc_call_poke_abort);
@@ -683,7 +684,8 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
 		/* it's too late for this call */
 		ret = -ESHUTDOWN;
 	} else if (p.command == RXRPC_CMD_SEND_ABORT) {
-		rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED, "CMD");
+		rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED,
+				    rxrpc_abort_call_sendmsg);
 		ret = 0;
 	} else if (p.command != RXRPC_CMD_SEND_DATA) {
 		ret = -EINVAL;
@@ -748,7 +750,9 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
 		break;
 	default:
 		/* Request phase complete for this client call */
-		trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send"));
+		trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send,
+				  call->cid, call->call_id, call->rx_consumed,
+				  0, -EPROTO);
 		ret = -EPROTO;
 		break;
 	}
@@ -766,17 +770,17 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
  * @call: The call to be aborted
  * @abort_code: The abort code to stick into the ABORT packet
  * @error: Local error value
- * @why: 3-char string indicating why.
+ * @why: Indication as to why.
  *
  * Allow a kernel service to abort a call, if it's still in an abortable state
  * and return true if the call was aborted, false if it was already complete.
  */
 bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
-			     u32 abort_code, int error, const char *why)
+			     u32 abort_code, int error, enum rxrpc_abort_reason why)
 {
 	bool aborted;
 
-	_enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+	_enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
 
 	mutex_lock(&call->user_mutex);
 	aborted = rxrpc_propose_abort(call, abort_code, error, why);
-- 
cgit v1.3.1


From 2953d3b8d8fd1188034c54862b74402b0b846695 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 21 Oct 2022 08:54:03 +0100
Subject: rxrpc: Offload the completion of service conn security to the I/O
 thread

Offload the completion of the challenge/response cycle on a service
connection to the I/O thread.  After the RESPONSE packet has been
successfully decrypted and verified by the work queue, offloading the
changing of the call states to the I/O thread makes iteration over the
conn's channel list simpler.

Do this by marking the RESPONSE skbuff and putting it onto the receive
queue for the I/O thread to collect.  We put it on the front of the queue
as we've already received the packet for it.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h |  2 ++
 net/rxrpc/ar-internal.h      |  1 +
 net/rxrpc/conn_event.c       | 46 ++++++++++++++++++++++++++++++--------------
 net/rxrpc/io_thread.c        |  5 +++++
 4 files changed, 40 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index caeabd50e049..85671f4a77de 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -126,6 +126,7 @@
 #define rxrpc_skb_traces \
 	EM(rxrpc_skb_eaten_by_unshare,		"ETN unshare  ") \
 	EM(rxrpc_skb_eaten_by_unshare_nomem,	"ETN unshar-nm") \
+	EM(rxrpc_skb_get_conn_secured,		"GET conn-secd") \
 	EM(rxrpc_skb_get_conn_work,		"GET conn-work") \
 	EM(rxrpc_skb_get_local_work,		"GET locl-work") \
 	EM(rxrpc_skb_get_reject_work,		"GET rej-work ") \
@@ -135,6 +136,7 @@
 	EM(rxrpc_skb_new_error_report,		"NEW error-rpt") \
 	EM(rxrpc_skb_new_jumbo_subpacket,	"NEW jumbo-sub") \
 	EM(rxrpc_skb_new_unshared,		"NEW unshared ") \
+	EM(rxrpc_skb_put_conn_secured,		"PUT conn-secd") \
 	EM(rxrpc_skb_put_conn_work,		"PUT conn-work") \
 	EM(rxrpc_skb_put_error_report,		"PUT error-rep") \
 	EM(rxrpc_skb_put_input,			"PUT input    ") \
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e9ab06100a21..e508ec221b75 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -38,6 +38,7 @@ struct rxrpc_txbuf;
 enum rxrpc_skb_mark {
 	RXRPC_SKB_MARK_PACKET,		/* Received packet */
 	RXRPC_SKB_MARK_ERROR,		/* Error notification */
+	RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
 	RXRPC_SKB_MARK_REJECT_BUSY,	/* Reject with BUSY */
 	RXRPC_SKB_MARK_REJECT_ABORT,	/* Reject with ABORT (code in skb->priority) */
 };
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 485d7f0fed2c..b2042702ca9a 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -248,7 +248,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 			       struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-	int loop, ret;
+	int ret;
 
 	if (conn->state == RXRPC_CONN_ABORTED)
 		return -ECONNABORTED;
@@ -269,22 +269,21 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
 		if (ret < 0)
 			return ret;
 
-		spin_lock(&conn->bundle->channel_lock);
 		spin_lock(&conn->state_lock);
-
-		if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+		if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING)
 			conn->state = RXRPC_CONN_SERVICE;
-			spin_unlock(&conn->state_lock);
-			for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
-				rxrpc_call_is_secure(
-					rcu_dereference_protected(
-						conn->channels[loop].call,
-						lockdep_is_held(&conn->bundle->channel_lock)));
-		} else {
-			spin_unlock(&conn->state_lock);
-		}
+		spin_unlock(&conn->state_lock);
 
-		spin_unlock(&conn->bundle->channel_lock);
+		if (conn->state == RXRPC_CONN_SERVICE) {
+			/* Offload call state flipping to the I/O thread.  As
+			 * we've already received the packet, put it on the
+			 * front of the queue.
+			 */
+			skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
+			rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
+			skb_queue_head(&conn->local->rx_queue, skb);
+			rxrpc_wake_up_io_thread(conn->local);
+		}
 		return 0;
 
 	default:
@@ -442,9 +441,28 @@ bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
  */
 void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
 {
+	unsigned int loop;
+
 	if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
 		rxrpc_abort_calls(conn);
 
+	switch (skb->mark) {
+	case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+		if (conn->state != RXRPC_CONN_SERVICE)
+			break;
+
+		spin_lock(&conn->bundle->channel_lock);
+
+		for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+			rxrpc_call_is_secure(
+				rcu_dereference_protected(
+					conn->channels[loop].call,
+					lockdep_is_held(&conn->bundle->channel_lock)));
+
+		spin_unlock(&conn->bundle->channel_lock);
+		break;
+	}
+
 	/* Process delayed ACKs whose time has come. */
 	if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
 		rxrpc_process_delayed_final_acks(conn, false);
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 33fd2394c8b3..751139b3c1ac 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -451,6 +451,7 @@ int rxrpc_io_thread(void *data)
 
 		/* Process received packets and errors. */
 		if ((skb = __skb_dequeue(&rx_queue))) {
+			struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
 			switch (skb->mark) {
 			case RXRPC_SKB_MARK_PACKET:
 				skb->priority = 0;
@@ -463,6 +464,10 @@ int rxrpc_io_thread(void *data)
 				rxrpc_input_error(local, skb);
 				rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
 				break;
+			case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+				rxrpc_input_conn_event(sp->conn, skb);
+				rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke);
+				rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured);
 				break;
 			default:
 				WARN_ON_ONCE(1);
-- 
cgit v1.3.1


From 1bab27af6b88b5c811f99de4812b5590f20d1cb7 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 21 Oct 2022 09:30:23 +0100
Subject: rxrpc: Set up a connection bundle from a call, not
 rxrpc_conn_parameters

Use the information now stored in struct rxrpc_call to configure the
connection bundle and thence the connection, rather than using the
rxrpc_conn_parameters struct.

Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h |   3 +-
 net/rxrpc/af_rxrpc.c         |   1 -
 net/rxrpc/ar-internal.h      |   8 +--
 net/rxrpc/call_object.c      |   4 +-
 net/rxrpc/conn_client.c      | 132 ++++++++++++++++++++++---------------------
 net/rxrpc/conn_object.c      |   2 +-
 net/rxrpc/sendmsg.c          |   1 -
 7 files changed, 76 insertions(+), 75 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 85671f4a77de..e2f6b79d5517 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -178,7 +178,6 @@
 #define rxrpc_peer_traces \
 	EM(rxrpc_peer_free,			"FREE        ") \
 	EM(rxrpc_peer_get_accept,		"GET accept  ") \
-	EM(rxrpc_peer_get_activate_call,	"GET act-call") \
 	EM(rxrpc_peer_get_bundle,		"GET bundle  ") \
 	EM(rxrpc_peer_get_client_conn,		"GET cln-conn") \
 	EM(rxrpc_peer_get_input,		"GET input   ") \
@@ -191,7 +190,6 @@
 	EM(rxrpc_peer_put_bundle,		"PUT bundle  ") \
 	EM(rxrpc_peer_put_call,			"PUT call    ") \
 	EM(rxrpc_peer_put_conn,			"PUT conn    ") \
-	EM(rxrpc_peer_put_discard_tmp,		"PUT disc-tmp") \
 	EM(rxrpc_peer_put_input,		"PUT input   ") \
 	EM(rxrpc_peer_put_input_error,		"PUT inpt-err") \
 	E_(rxrpc_peer_put_keepalive,		"PUT keepaliv")
@@ -201,6 +199,7 @@
 	EM(rxrpc_bundle_get_client_call,	"GET clt-call") \
 	EM(rxrpc_bundle_get_client_conn,	"GET clt-conn") \
 	EM(rxrpc_bundle_get_service_conn,	"GET svc-conn") \
+	EM(rxrpc_bundle_put_call,		"PUT call    ") \
 	EM(rxrpc_bundle_put_conn,		"PUT conn    ") \
 	EM(rxrpc_bundle_put_discard,		"PUT discard ") \
 	E_(rxrpc_bundle_new,			"NEW         ")
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 6f6a6b77ee84..f4e1ffff2ba4 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -328,7 +328,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
 		mutex_unlock(&call->user_mutex);
 	}
 
-	rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
 	_leave(" = %p", call);
 	return call;
 }
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index e508ec221b75..2740c6333114 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -360,7 +360,6 @@ struct rxrpc_conn_proto {
 
 struct rxrpc_conn_parameters {
 	struct rxrpc_local	*local;		/* Representation of local endpoint */
-	struct rxrpc_peer	*peer;		/* Remote endpoint */
 	struct key		*key;		/* Security details */
 	bool			exclusive;	/* T if conn is exclusive */
 	bool			upgrade;	/* T if service ID can be upgraded */
@@ -428,6 +427,7 @@ struct rxrpc_bundle {
 	struct rxrpc_local	*local;		/* Representation of local endpoint */
 	struct rxrpc_peer	*peer;		/* Remote endpoint */
 	struct key		*key;		/* Security details */
+	const struct rxrpc_security *security;	/* applied security module */
 	refcount_t		ref;
 	atomic_t		active;		/* Number of active users */
 	unsigned int		debug_id;
@@ -593,6 +593,7 @@ enum rxrpc_congest_mode {
 struct rxrpc_call {
 	struct rcu_head		rcu;
 	struct rxrpc_connection	*conn;		/* connection carrying call */
+	struct rxrpc_bundle	*bundle;	/* Connection bundle to use */
 	struct rxrpc_peer	*peer;		/* Peer record for remote address */
 	struct rxrpc_local	*local;		/* Representation of local endpoint */
 	struct rxrpc_sock __rcu	*socket;	/* socket responsible */
@@ -894,11 +895,10 @@ extern unsigned long rxrpc_conn_idle_client_fast_expiry;
 void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local);
 struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
 void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
-int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
-		       struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
-		       gfp_t);
+int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp);
 void rxrpc_expose_client_call(struct rxrpc_call *);
 void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
 void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
 void rxrpc_discard_expired_client_conns(struct work_struct *);
 void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 705f6e26cc75..835e9781afc6 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -365,7 +365,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	/* Set up or get a connection record and set the protocol parameters,
 	 * including channel number and call ID.
 	 */
-	ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
+	ret = rxrpc_connect_call(call, gfp);
 	if (ret < 0)
 		goto error_attached_to_socket;
 
@@ -663,6 +663,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
 
 	rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
 	rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
+	rxrpc_deactivate_bundle(call->bundle);
+	rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call);
 	rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
 	rxrpc_put_local(call->local, rxrpc_local_put_call);
 	call_rcu(&call->rcu, rxrpc_rcu_free_call);
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 59ce5c08cf57..c0db7722571e 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -34,7 +34,10 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900;
 __read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
 __read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
 
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
+static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle)
+{
+	atomic_inc(&bundle->active);
+}
 
 /*
  * Get a connection ID and epoch for a client connection from the global pool.
@@ -109,20 +112,21 @@ void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
 /*
  * Allocate a connection bundle.
  */
-static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
+static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
 					       gfp_t gfp)
 {
 	struct rxrpc_bundle *bundle;
 
 	bundle = kzalloc(sizeof(*bundle), gfp);
 	if (bundle) {
-		bundle->local		= cp->local;
-		bundle->peer		= rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle);
-		bundle->key		= cp->key;
-		bundle->exclusive	= cp->exclusive;
-		bundle->upgrade		= cp->upgrade;
-		bundle->service_id	= cp->service_id;
-		bundle->security_level	= cp->security_level;
+		bundle->local		= call->local;
+		bundle->peer		= rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle);
+		bundle->key		= key_get(call->key);
+		bundle->security	= call->security;
+		bundle->exclusive	= test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+		bundle->upgrade		= test_bit(RXRPC_CALL_UPGRADE, &call->flags);
+		bundle->service_id	= call->dest_srx.srx_service;
+		bundle->security_level	= call->security_level;
 		refcount_set(&bundle->ref, 1);
 		atomic_set(&bundle->active, 1);
 		spin_lock_init(&bundle->channel_lock);
@@ -146,19 +150,23 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
 {
 	trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
 	rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
+	key_put(bundle->key);
 	kfree(bundle);
 }
 
 void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
 {
-	unsigned int id = bundle->debug_id;
+	unsigned int id;
 	bool dead;
 	int r;
 
-	dead = __refcount_dec_and_test(&bundle->ref, &r);
-	trace_rxrpc_bundle(id, r - 1, why);
-	if (dead)
-		rxrpc_free_bundle(bundle);
+	if (bundle) {
+		id = bundle->debug_id;
+		dead = __refcount_dec_and_test(&bundle->ref, &r);
+		trace_rxrpc_bundle(id, r - 1, why);
+		if (dead)
+			rxrpc_free_bundle(bundle);
+	}
 }
 
 /*
@@ -272,20 +280,23 @@ dont_reuse:
  * Look up the conn bundle that matches the connection parameters, adding it if
  * it doesn't yet exist.
  */
-static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp,
-						 gfp_t gfp)
+static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
 {
 	static atomic_t rxrpc_bundle_id;
 	struct rxrpc_bundle *bundle, *candidate;
-	struct rxrpc_local *local = cp->local;
+	struct rxrpc_local *local = call->local;
 	struct rb_node *p, **pp, *parent;
 	long diff;
+	bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
 
 	_enter("{%px,%x,%u,%u}",
-	       cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade);
+	       call->peer, key_serial(call->key), call->security_level,
+	       upgrade);
 
-	if (cp->exclusive)
-		return rxrpc_alloc_bundle(cp, gfp);
+	if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) {
+		call->bundle = rxrpc_alloc_bundle(call, gfp);
+		return call->bundle;
+	}
 
 	/* First, see if the bundle is already there. */
 	_debug("search 1");
@@ -294,11 +305,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
 	while (p) {
 		bundle = rb_entry(p, struct rxrpc_bundle, local_node);
 
-#define cmp(X) ((long)bundle->X - (long)cp->X)
-		diff = (cmp(peer) ?:
-			cmp(key) ?:
-			cmp(security_level) ?:
-			cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+		diff = (cmp(bundle->peer, call->peer) ?:
+			cmp(bundle->key, call->key) ?:
+			cmp(bundle->security_level, call->security_level) ?:
+			cmp(bundle->upgrade, upgrade));
 #undef cmp
 		if (diff < 0)
 			p = p->rb_left;
@@ -311,9 +322,9 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
 	_debug("not found");
 
 	/* It wasn't.  We need to add one. */
-	candidate = rxrpc_alloc_bundle(cp, gfp);
+	candidate = rxrpc_alloc_bundle(call, gfp);
 	if (!candidate)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	_debug("search 2");
 	spin_lock(&local->client_bundles_lock);
@@ -323,11 +334,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
 		parent = *pp;
 		bundle = rb_entry(parent, struct rxrpc_bundle, local_node);
 
-#define cmp(X) ((long)bundle->X - (long)cp->X)
-		diff = (cmp(peer) ?:
-			cmp(key) ?:
-			cmp(security_level) ?:
-			cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+		diff = (cmp(bundle->peer, call->peer) ?:
+			cmp(bundle->key, call->key) ?:
+			cmp(bundle->security_level, call->security_level) ?:
+			cmp(bundle->upgrade, upgrade));
 #undef cmp
 		if (diff < 0)
 			pp = &(*pp)->rb_left;
@@ -341,19 +352,19 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
 	candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
 	rb_link_node(&candidate->local_node, parent, pp);
 	rb_insert_color(&candidate->local_node, &local->client_bundles);
-	rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
+	call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
 	spin_unlock(&local->client_bundles_lock);
-	_leave(" = %u [new]", candidate->debug_id);
-	return candidate;
+	_leave(" = B=%u [new]", call->bundle->debug_id);
+	return call->bundle;
 
 found_bundle_free:
 	rxrpc_free_bundle(candidate);
 found_bundle:
-	rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
-	atomic_inc(&bundle->active);
+	call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
+	rxrpc_activate_bundle(bundle);
 	spin_unlock(&local->client_bundles_lock);
-	_leave(" = %u [found]", bundle->debug_id);
-	return bundle;
+	_leave(" = B=%u [found]", call->bundle->debug_id);
+	return call->bundle;
 }
 
 /*
@@ -362,31 +373,25 @@ found_bundle:
  * If we return with a connection, the call will be on its waiting list.  It's
  * left to the caller to assign a channel and wake up the call.
  */
-static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
-					    struct rxrpc_call *call,
-					    struct rxrpc_conn_parameters *cp,
-					    struct sockaddr_rxrpc *srx,
-					    gfp_t gfp)
+static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_call *call, gfp_t gfp)
 {
 	struct rxrpc_bundle *bundle;
 
 	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
 
-	cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
-	if (!cp->peer)
+	call->peer = rxrpc_lookup_peer(call->local, &call->dest_srx, gfp);
+	if (!call->peer)
 		goto error;
 
 	call->tx_last_sent = ktime_get_real();
-	call->cong_ssthresh = cp->peer->cong_ssthresh;
+	call->cong_ssthresh = call->peer->cong_ssthresh;
 	if (call->cong_cwnd >= call->cong_ssthresh)
 		call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
 	else
 		call->cong_mode = RXRPC_CALL_SLOW_START;
-	if (cp->upgrade)
-		__set_bit(RXRPC_CALL_UPGRADE, &call->flags);
 
 	/* Find the client connection bundle. */
-	bundle = rxrpc_look_up_bundle(cp, gfp);
+	bundle = rxrpc_look_up_bundle(call, gfp);
 	if (!bundle)
 		goto error;
 
@@ -449,7 +454,7 @@ static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
 			if (old)
 				trace_rxrpc_client(old, -1, rxrpc_client_replace);
 			candidate->bundle_shift = shift;
-			atomic_inc(&bundle->active);
+			rxrpc_activate_bundle(bundle);
 			bundle->conns[i] = candidate;
 			for (j = 0; j < RXRPC_MAXCALLS; j++)
 				set_bit(shift + j, &bundle->avail_chans);
@@ -541,7 +546,6 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 
 	rxrpc_see_call(call, rxrpc_call_see_activate_client);
 	list_del_init(&call->chan_wait_link);
-	call->peer	= rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call);
 	call->conn	= rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
 	call->cid	= conn->proto.cid | channel;
 	call->call_id	= call_id;
@@ -705,14 +709,11 @@ out:
  * find a connection for a call
  * - called in process context with IRQs enabled
  */
-int rxrpc_connect_call(struct rxrpc_sock *rx,
-		       struct rxrpc_call *call,
-		       struct rxrpc_conn_parameters *cp,
-		       struct sockaddr_rxrpc *srx,
-		       gfp_t gfp)
+int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
 {
 	struct rxrpc_bundle *bundle;
-	struct rxrpc_net *rxnet = cp->local->rxnet;
+	struct rxrpc_local *local = call->local;
+	struct rxrpc_net *rxnet = local->rxnet;
 	int ret = 0;
 
 	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
@@ -721,7 +722,7 @@ int rxrpc_connect_call(struct rxrpc_sock *rx,
 
 	rxrpc_get_call(call, rxrpc_call_get_io_thread);
 
-	bundle = rxrpc_prep_call(rx, call, cp, srx, gfp);
+	bundle = rxrpc_prep_call(call, gfp);
 	if (IS_ERR(bundle)) {
 		rxrpc_put_call(call, rxrpc_call_get_io_thread);
 		ret = PTR_ERR(bundle);
@@ -738,9 +739,6 @@ granted_channel:
 	/* Paired with the write barrier in rxrpc_activate_one_channel(). */
 	smp_rmb();
 
-out_put_bundle:
-	rxrpc_deactivate_bundle(bundle);
-	rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call);
 out:
 	_leave(" = %d", ret);
 	return ret;
@@ -758,7 +756,7 @@ wait_failed:
 	trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
 	rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
 	rxrpc_disconnect_client_call(bundle, call);
-	goto out_put_bundle;
+	goto out;
 }
 
 /*
@@ -945,11 +943,15 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
 /*
  * Drop the active count on a bundle.
  */
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
 {
-	struct rxrpc_local *local = bundle->local;
+	struct rxrpc_local *local;
 	bool need_put = false;
 
+	if (!bundle)
+		return;
+
+	local = bundle->local;
 	if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
 		if (!bundle->exclusive) {
 			_debug("erase bundle");
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 2e3f0a222e1b..2a7d5378300c 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -208,7 +208,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 	}
 
 	if (rxrpc_is_client_call(call)) {
-		rxrpc_disconnect_client_call(conn->bundle, call);
+		rxrpc_disconnect_client_call(call->bundle, call);
 	} else {
 		spin_lock(&conn->bundle->channel_lock);
 		__rxrpc_disconnect_call(conn, call);
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index d67808b659f1..2a003c3a9897 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -564,7 +564,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 				     atomic_inc_return(&rxrpc_debug_id));
 	/* The socket is now unlocked */
 
-	rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
 	_leave(" = %p\n", call);
 	return call;
 }
-- 
cgit v1.3.1


From 9d35d880e0e4a3ab32d8c12f9e4d76198aadd42d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 19 Oct 2022 09:45:43 +0100
Subject: rxrpc: Move client call connection to the I/O thread

Move the connection setup of client calls to the I/O thread so that a whole
load of locking and barrierage can be eliminated.  This necessitates the
app thread waiting for connection to complete before it can begin
encrypting data.

This also completes the fix for a race that exists between call connection
and call disconnection whereby the data transmission code adds the call to
the peer error distribution list after the call has been disconnected (say
by the rxrpc socket getting closed).

The fix is to complete the process of moving call connection, data
transmission and call disconnection into the I/O thread and thus forcibly
serialising them.

Note that the issue may predate the overhaul to an I/O thread model that
were included in the merge window for v6.2, but the timing is very much
changed by the change given below.

Fixes: cf37b5987508 ("rxrpc: Move DATA transmission into call processor work item")
Reported-by: syzbot+c22650d2844392afdcfd@syzkaller.appspotmail.com
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Marc Dionne <marc.dionne@auristor.com>
cc: linux-afs@lists.infradead.org
---
 include/trace/events/rxrpc.h |   5 +-
 net/rxrpc/ar-internal.h      |  22 +-
 net/rxrpc/call_object.c      |  58 +++--
 net/rxrpc/call_state.c       |   2 +-
 net/rxrpc/conn_client.c      | 533 ++++++++++---------------------------------
 net/rxrpc/conn_event.c       |  49 +---
 net/rxrpc/conn_object.c      |  19 +-
 net/rxrpc/conn_service.c     |   1 -
 net/rxrpc/io_thread.c        |  13 +-
 net/rxrpc/local_object.c     |   6 +-
 net/rxrpc/proc.c             |   1 +
 net/rxrpc/rxkad.c            |  21 +-
 net/rxrpc/security.c         |  33 ++-
 net/rxrpc/sendmsg.c          |  64 ++++++
 14 files changed, 297 insertions(+), 530 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index e2f6b79d5517..283db0ea3db4 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -218,7 +218,6 @@
 	EM(rxrpc_conn_put_call,			"PUT call    ") \
 	EM(rxrpc_conn_put_call_input,		"PUT inp-call") \
 	EM(rxrpc_conn_put_conn_input,		"PUT inp-conn") \
-	EM(rxrpc_conn_put_discard,		"PUT discard ") \
 	EM(rxrpc_conn_put_discard_idle,		"PUT disc-idl") \
 	EM(rxrpc_conn_put_local_dead,		"PUT loc-dead") \
 	EM(rxrpc_conn_put_noreuse,		"PUT noreuse ") \
@@ -240,12 +239,11 @@
 	EM(rxrpc_client_chan_activate,		"ChActv") \
 	EM(rxrpc_client_chan_disconnect,	"ChDisc") \
 	EM(rxrpc_client_chan_pass,		"ChPass") \
-	EM(rxrpc_client_chan_wait_failed,	"ChWtFl") \
 	EM(rxrpc_client_cleanup,		"Clean ") \
 	EM(rxrpc_client_discard,		"Discar") \
-	EM(rxrpc_client_duplicate,		"Duplic") \
 	EM(rxrpc_client_exposed,		"Expose") \
 	EM(rxrpc_client_replace,		"Replac") \
+	EM(rxrpc_client_queue_new_call,		"Q-Call") \
 	EM(rxrpc_client_to_active,		"->Actv") \
 	E_(rxrpc_client_to_idle,		"->Idle")
 
@@ -273,6 +271,7 @@
 	EM(rxrpc_call_put_sendmsg,		"PUT sendmsg ") \
 	EM(rxrpc_call_put_unnotify,		"PUT unnotify") \
 	EM(rxrpc_call_put_userid_exists,	"PUT u-exists") \
+	EM(rxrpc_call_put_userid,		"PUT user-id ") \
 	EM(rxrpc_call_see_accept,		"SEE accept  ") \
 	EM(rxrpc_call_see_activate_client,	"SEE act-clnt") \
 	EM(rxrpc_call_see_connect_failed,	"SEE con-fail") \
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index de84061a5447..007258538bee 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -292,7 +292,6 @@ struct rxrpc_local {
 	struct rb_root		client_bundles;	/* Client connection bundles by socket params */
 	spinlock_t		client_bundles_lock; /* Lock for client_bundles */
 	bool			kill_all_client_conns;
-	spinlock_t		client_conn_cache_lock; /* Lock for ->*_client_conns */
 	struct list_head	idle_client_conns;
 	struct timer_list	client_conn_reap_timer;
 	unsigned long		client_conn_flags;
@@ -304,7 +303,8 @@ struct rxrpc_local {
 	bool			dead;
 	bool			service_closed;	/* Service socket closed */
 	struct idr		conn_ids;	/* List of connection IDs */
-	spinlock_t		conn_lock;	/* Lock for client connection pool */
+	struct list_head	new_client_calls; /* Newly created client calls need connection */
+	spinlock_t		client_call_lock; /* Lock for ->new_client_calls */
 	struct sockaddr_rxrpc	srx;		/* local address */
 };
 
@@ -385,7 +385,6 @@ enum rxrpc_call_completion {
  * Bits in the connection flags.
  */
 enum rxrpc_conn_flag {
-	RXRPC_CONN_HAS_IDR,		/* Has a client conn ID assigned */
 	RXRPC_CONN_IN_SERVICE_CONNS,	/* Conn is in peer->service_conns */
 	RXRPC_CONN_DONT_REUSE,		/* Don't reuse this connection */
 	RXRPC_CONN_PROBING_FOR_UPGRADE,	/* Probing for service upgrade */
@@ -413,6 +412,7 @@ enum rxrpc_conn_event {
  */
 enum rxrpc_conn_proto_state {
 	RXRPC_CONN_UNUSED,		/* Connection not yet attempted */
+	RXRPC_CONN_CLIENT_UNSECURED,	/* Client connection needs security init */
 	RXRPC_CONN_CLIENT,		/* Client connection */
 	RXRPC_CONN_SERVICE_PREALLOC,	/* Service connection preallocation */
 	RXRPC_CONN_SERVICE_UNSECURED,	/* Service unsecured connection */
@@ -436,11 +436,9 @@ struct rxrpc_bundle {
 	u32			security_level;	/* Security level selected */
 	u16			service_id;	/* Service ID for this connection */
 	bool			try_upgrade;	/* True if the bundle is attempting upgrade */
-	bool			alloc_conn;	/* True if someone's getting a conn */
 	bool			exclusive;	/* T if conn is exclusive */
 	bool			upgrade;	/* T if service ID can be upgraded */
-	short			alloc_error;	/* Error from last conn allocation */
-	spinlock_t		channel_lock;
+	unsigned short		alloc_error;	/* Error from last conn allocation */
 	struct rb_node		local_node;	/* Node in local->client_conns */
 	struct list_head	waiting_calls;	/* Calls waiting for channels */
 	unsigned long		avail_chans;	/* Mask of available channels */
@@ -468,7 +466,7 @@ struct rxrpc_connection {
 	unsigned char		act_chans;	/* Mask of active channels */
 	struct rxrpc_channel {
 		unsigned long		final_ack_at;	/* Time at which to issue final ACK */
-		struct rxrpc_call __rcu	*call;		/* Active call */
+		struct rxrpc_call	*call;		/* Active call */
 		unsigned int		call_debug_id;	/* call->debug_id */
 		u32			call_id;	/* ID of current call */
 		u32			call_counter;	/* Call ID counter */
@@ -489,6 +487,7 @@ struct rxrpc_connection {
 	struct list_head	link;		/* link in master connection list */
 	struct sk_buff_head	rx_queue;	/* received conn-level packets */
 
+	struct mutex		security_lock;	/* Lock for security management */
 	const struct rxrpc_security *security;	/* applied security module */
 	union {
 		struct {
@@ -619,7 +618,7 @@ struct rxrpc_call {
 	struct work_struct	destroyer;	/* In-process-context destroyer */
 	rxrpc_notify_rx_t	notify_rx;	/* kernel service Rx notification function */
 	struct list_head	link;		/* link in master call list */
-	struct list_head	chan_wait_link;	/* Link in conn->bundle->waiting_calls */
+	struct list_head	wait_link;	/* Link in local->new_client_calls */
 	struct hlist_node	error_link;	/* link in error distribution list */
 	struct list_head	accept_link;	/* Link in rx->acceptq */
 	struct list_head	recvmsg_link;	/* Link in rx->recvmsg_q */
@@ -866,6 +865,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
 					 struct sockaddr_rxrpc *,
 					 struct rxrpc_call_params *, gfp_t,
 					 unsigned int);
+void rxrpc_start_call_timer(struct rxrpc_call *call);
 void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
 			 struct sk_buff *);
 void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
@@ -905,6 +905,7 @@ static inline void rxrpc_set_call_state(struct rxrpc_call *call,
 {
 	/* Order write of completion info before write of ->state. */
 	smp_store_release(&call->_state, state);
+	wake_up(&call->waitq);
 }
 
 static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call)
@@ -940,10 +941,11 @@ extern unsigned int rxrpc_reap_client_connections;
 extern unsigned long rxrpc_conn_idle_client_expiry;
 extern unsigned long rxrpc_conn_idle_client_fast_expiry;
 
-void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local);
+void rxrpc_purge_client_connections(struct rxrpc_local *local);
 struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
 void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
-int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp);
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp);
+void rxrpc_connect_client_calls(struct rxrpc_local *local);
 void rxrpc_expose_client_call(struct rxrpc_call *);
 void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
 void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index c94161acf3c4..3ded5a24627c 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -150,7 +150,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
 	timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
 	INIT_WORK(&call->destroyer, rxrpc_destroy_call);
 	INIT_LIST_HEAD(&call->link);
-	INIT_LIST_HEAD(&call->chan_wait_link);
+	INIT_LIST_HEAD(&call->wait_link);
 	INIT_LIST_HEAD(&call->accept_link);
 	INIT_LIST_HEAD(&call->recvmsg_link);
 	INIT_LIST_HEAD(&call->sock_link);
@@ -242,7 +242,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
 /*
  * Initiate the call ack/resend/expiry timer.
  */
-static void rxrpc_start_call_timer(struct rxrpc_call *call)
+void rxrpc_start_call_timer(struct rxrpc_call *call)
 {
 	unsigned long now = jiffies;
 	unsigned long j = now + MAX_JIFFY_OFFSET;
@@ -286,6 +286,39 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call)
 	up(limiter);
 }
 
+/*
+ * Start the process of connecting a call.  We obtain a peer and a connection
+ * bundle, but the actual association of a call with a connection is offloaded
+ * to the I/O thread to simplify locking.
+ */
+static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
+{
+	struct rxrpc_local *local = call->local;
+	int ret = 0;
+
+	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+	call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
+	if (!call->peer)
+		goto error;
+
+	ret = rxrpc_look_up_bundle(call, gfp);
+	if (ret < 0)
+		goto error;
+
+	trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call);
+	rxrpc_get_call(call, rxrpc_call_get_io_thread);
+	spin_lock(&local->client_call_lock);
+	list_add_tail(&call->wait_link, &local->new_client_calls);
+	spin_unlock(&local->client_call_lock);
+	rxrpc_wake_up_io_thread(local);
+	return 0;
+
+error:
+	__set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+	return ret;
+}
+
 /*
  * Set up a call for the given parameters.
  * - Called with the socket lock held, which it must release.
@@ -369,10 +402,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
 	if (ret < 0)
 		goto error_attached_to_socket;
 
-	rxrpc_see_call(call, rxrpc_call_see_connected);
-
-	rxrpc_start_call_timer(call);
-
 	_leave(" = %p [new]", call);
 	return call;
 
@@ -387,22 +416,20 @@ error_dup_user_ID:
 	rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST);
 	trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
 			 rxrpc_call_see_userid_exists);
-	rxrpc_release_call(rx, call);
 	mutex_unlock(&call->user_mutex);
 	rxrpc_put_call(call, rxrpc_call_put_userid_exists);
 	_leave(" = -EEXIST");
 	return ERR_PTR(-EEXIST);
 
 	/* We got an error, but the call is attached to the socket and is in
-	 * need of release.  However, we might now race with recvmsg() when
-	 * completing the call queues it.  Return 0 from sys_sendmsg() and
+	 * need of release.  However, we might now race with recvmsg() when it
+	 * completion notifies the socket.  Return 0 from sys_sendmsg() and
 	 * leave the error to recvmsg() to deal with.
 	 */
 error_attached_to_socket:
 	trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
 			 rxrpc_call_see_connect_failed);
-	set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
-	rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret);
+	rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
 	_leave(" = c=%08x [err]", call->debug_id);
 	return call;
 }
@@ -460,7 +487,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
 	chan = sp->hdr.cid & RXRPC_CHANNELMASK;
 	conn->channels[chan].call_counter = call->call_id;
 	conn->channels[chan].call_id = call->call_id;
-	rcu_assign_pointer(conn->channels[chan].call, call);
+	conn->channels[chan].call = call;
 	spin_unlock(&conn->state_lock);
 
 	spin_lock(&conn->peer->lock);
@@ -520,7 +547,7 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
 void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 {
 	struct rxrpc_connection *conn = call->conn;
-	bool put = false;
+	bool put = false, putu = false;
 
 	_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
 
@@ -555,7 +582,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 	if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
 		rb_erase(&call->sock_node, &rx->calls);
 		memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
-		rxrpc_put_call(call, rxrpc_call_put_userid_exists);
+		putu = true;
 	}
 
 	list_del(&call->sock_link);
@@ -563,6 +590,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
 
 	_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
 
+	if (putu)
+		rxrpc_put_call(call, rxrpc_call_put_userid);
+
 	_leave("");
 }
 
diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c
index 27dc1242b712..6afb54373ebb 100644
--- a/net/rxrpc/call_state.c
+++ b/net/rxrpc/call_state.c
@@ -65,5 +65,5 @@ void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion comp
 	call->completion	= compl;
 	call->_state		= RXRPC_CALL_COMPLETE;
 	trace_rxrpc_call_complete(call);
-	__set_bit(RXRPC_CALL_RELEASED, &call->flags);
+	WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags));
 }
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index ebb43f65ebc5..981ca5b98bcb 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -39,61 +39,19 @@ static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle)
 	atomic_inc(&bundle->active);
 }
 
-/*
- * Get a connection ID and epoch for a client connection from the global pool.
- * The connection struct pointer is then recorded in the idr radix tree.  The
- * epoch doesn't change until the client is rebooted (or, at least, unless the
- * module is unloaded).
- */
-static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
-					  gfp_t gfp)
-{
-	struct rxrpc_local *local = conn->local;
-	int id;
-
-	_enter("");
-
-	idr_preload(gfp);
-	spin_lock(&local->conn_lock);
-
-	id = idr_alloc_cyclic(&local->conn_ids, conn,
-			      1, 0x40000000, GFP_NOWAIT);
-	if (id < 0)
-		goto error;
-
-	spin_unlock(&local->conn_lock);
-	idr_preload_end();
-
-	conn->proto.epoch = local->rxnet->epoch;
-	conn->proto.cid = id << RXRPC_CIDSHIFT;
-	set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
-	_leave(" [CID %x]", conn->proto.cid);
-	return 0;
-
-error:
-	spin_unlock(&local->conn_lock);
-	idr_preload_end();
-	_leave(" = %d", id);
-	return id;
-}
-
 /*
  * Release a connection ID for a client connection.
  */
 static void rxrpc_put_client_connection_id(struct rxrpc_local *local,
 					   struct rxrpc_connection *conn)
 {
-	if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
-		spin_lock(&local->conn_lock);
-		idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT);
-		spin_unlock(&local->conn_lock);
-	}
+	idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT);
 }
 
 /*
  * Destroy the client connection ID tree.
  */
-void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
+static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
 {
 	struct rxrpc_connection *conn;
 	int id;
@@ -129,7 +87,6 @@ static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
 		bundle->security_level	= call->security_level;
 		refcount_set(&bundle->ref, 1);
 		atomic_set(&bundle->active, 1);
-		spin_lock_init(&bundle->channel_lock);
 		INIT_LIST_HEAD(&bundle->waiting_calls);
 		trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
 	}
@@ -169,69 +126,68 @@ void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
 	}
 }
 
+/*
+ * Get rid of outstanding client connection preallocations when a local
+ * endpoint is destroyed.
+ */
+void rxrpc_purge_client_connections(struct rxrpc_local *local)
+{
+	rxrpc_destroy_client_conn_ids(local);
+}
+
 /*
  * Allocate a client connection.
  */
 static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
+rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle)
 {
 	struct rxrpc_connection *conn;
-	struct rxrpc_net *rxnet = bundle->local->rxnet;
-	int ret;
+	struct rxrpc_local *local = bundle->local;
+	struct rxrpc_net *rxnet = local->rxnet;
+	int id;
 
 	_enter("");
 
-	conn = rxrpc_alloc_connection(rxnet, gfp);
-	if (!conn) {
-		_leave(" = -ENOMEM");
+	conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN);
+	if (!conn)
 		return ERR_PTR(-ENOMEM);
+
+	id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000,
+			      GFP_ATOMIC | __GFP_NOWARN);
+	if (id < 0) {
+		kfree(conn);
+		return ERR_PTR(id);
 	}
 
 	refcount_set(&conn->ref, 1);
-	conn->bundle		= bundle;
-	conn->local		= bundle->local;
-	conn->peer		= bundle->peer;
-	conn->key		= bundle->key;
+	conn->proto.cid		= id << RXRPC_CIDSHIFT;
+	conn->proto.epoch	= local->rxnet->epoch;
+	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
+	conn->bundle		= rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
+	conn->local		= rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn);
+	conn->peer		= rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn);
+	conn->key		= key_get(bundle->key);
+	conn->security		= bundle->security;
 	conn->exclusive		= bundle->exclusive;
 	conn->upgrade		= bundle->upgrade;
 	conn->orig_service_id	= bundle->service_id;
 	conn->security_level	= bundle->security_level;
-	conn->out_clientflag	= RXRPC_CLIENT_INITIATED;
-	conn->state		= RXRPC_CONN_CLIENT;
+	conn->state		= RXRPC_CONN_CLIENT_UNSECURED;
 	conn->service_id	= conn->orig_service_id;
 
-	ret = rxrpc_get_client_connection_id(conn, gfp);
-	if (ret < 0)
-		goto error_0;
-
-	ret = rxrpc_init_client_conn_security(conn);
-	if (ret < 0)
-		goto error_1;
+	if (conn->security == &rxrpc_no_security)
+		conn->state	= RXRPC_CONN_CLIENT;
 
 	atomic_inc(&rxnet->nr_conns);
 	write_lock(&rxnet->conn_lock);
 	list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
 	write_unlock(&rxnet->conn_lock);
 
-	rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
-	rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn);
-	rxrpc_get_local(conn->local, rxrpc_local_get_client_conn);
-	key_get(conn->key);
-
-	trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
-			 rxrpc_conn_new_client);
+	rxrpc_see_connection(conn, rxrpc_conn_new_client);
 
 	atomic_inc(&rxnet->nr_client_conns);
 	trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
-	_leave(" = %p", conn);
 	return conn;
-
-error_1:
-	rxrpc_put_client_connection_id(bundle->local, conn);
-error_0:
-	kfree(conn);
-	_leave(" = %d", ret);
-	return ERR_PTR(ret);
 }
 
 /*
@@ -249,7 +205,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
 	if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
 		goto dont_reuse;
 
-	if (conn->state != RXRPC_CONN_CLIENT ||
+	if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED &&
+	     conn->state != RXRPC_CONN_CLIENT) ||
 	    conn->proto.epoch != rxnet->epoch)
 		goto mark_dont_reuse;
 
@@ -280,7 +237,7 @@ dont_reuse:
  * Look up the conn bundle that matches the connection parameters, adding it if
  * it doesn't yet exist.
  */
-static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
 {
 	static atomic_t rxrpc_bundle_id;
 	struct rxrpc_bundle *bundle, *candidate;
@@ -295,7 +252,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t
 
 	if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) {
 		call->bundle = rxrpc_alloc_bundle(call, gfp);
-		return call->bundle;
+		return call->bundle ? 0 : -ENOMEM;
 	}
 
 	/* First, see if the bundle is already there. */
@@ -324,7 +281,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t
 	/* It wasn't.  We need to add one. */
 	candidate = rxrpc_alloc_bundle(call, gfp);
 	if (!candidate)
-		return ERR_PTR(-ENOMEM);
+		return -ENOMEM;
 
 	_debug("search 2");
 	spin_lock(&local->client_bundles_lock);
@@ -355,7 +312,7 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t
 	call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
 	spin_unlock(&local->client_bundles_lock);
 	_leave(" = B=%u [new]", call->bundle->debug_id);
-	return call->bundle;
+	return 0;
 
 found_bundle_free:
 	rxrpc_free_bundle(candidate);
@@ -364,160 +321,77 @@ found_bundle:
 	rxrpc_activate_bundle(bundle);
 	spin_unlock(&local->client_bundles_lock);
 	_leave(" = B=%u [found]", call->bundle->debug_id);
-	return call->bundle;
-}
-
-/*
- * Create or find a client bundle to use for a call.
- *
- * If we return with a connection, the call will be on its waiting list.  It's
- * left to the caller to assign a channel and wake up the call.
- */
-static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_call *call, gfp_t gfp)
-{
-	struct rxrpc_bundle *bundle;
-
-	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
-	call->peer = rxrpc_lookup_peer(call->local, &call->dest_srx, gfp);
-	if (!call->peer)
-		goto error;
-
-	call->tx_last_sent = ktime_get_real();
-	call->cong_ssthresh = call->peer->cong_ssthresh;
-	if (call->cong_cwnd >= call->cong_ssthresh)
-		call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
-	else
-		call->cong_mode = RXRPC_CALL_SLOW_START;
-
-	/* Find the client connection bundle. */
-	bundle = rxrpc_look_up_bundle(call, gfp);
-	if (!bundle)
-		goto error;
-
-	/* Get this call queued.  Someone else may activate it whilst we're
-	 * lining up a new connection, but that's fine.
-	 */
-	spin_lock(&bundle->channel_lock);
-	list_add_tail(&call->chan_wait_link, &bundle->waiting_calls);
-	spin_unlock(&bundle->channel_lock);
-
-	_leave(" = [B=%x]", bundle->debug_id);
-	return bundle;
-
-error:
-	_leave(" = -ENOMEM");
-	return ERR_PTR(-ENOMEM);
+	return 0;
 }
 
 /*
  * Allocate a new connection and add it into a bundle.
  */
-static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
-	__releases(bundle->channel_lock)
+static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
+				     unsigned int slot)
 {
-	struct rxrpc_connection *candidate = NULL, *old = NULL;
-	bool conflict;
-	int i;
-
-	_enter("");
-
-	conflict = bundle->alloc_conn;
-	if (!conflict)
-		bundle->alloc_conn = true;
-	spin_unlock(&bundle->channel_lock);
-	if (conflict) {
-		_leave(" [conf]");
-		return;
-	}
-
-	candidate = rxrpc_alloc_client_connection(bundle, gfp);
-
-	spin_lock(&bundle->channel_lock);
-	bundle->alloc_conn = false;
+	struct rxrpc_connection *conn, *old;
+	unsigned int shift = slot * RXRPC_MAXCALLS;
+	unsigned int i;
 
-	if (IS_ERR(candidate)) {
-		bundle->alloc_error = PTR_ERR(candidate);
-		spin_unlock(&bundle->channel_lock);
-		_leave(" [err %ld]", PTR_ERR(candidate));
-		return;
+	old = bundle->conns[slot];
+	if (old) {
+		bundle->conns[slot] = NULL;
+		trace_rxrpc_client(old, -1, rxrpc_client_replace);
+		rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
 	}
 
-	bundle->alloc_error = 0;
-
-	for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
-		unsigned int shift = i * RXRPC_MAXCALLS;
-		int j;
-
-		old = bundle->conns[i];
-		if (!rxrpc_may_reuse_conn(old)) {
-			if (old)
-				trace_rxrpc_client(old, -1, rxrpc_client_replace);
-			candidate->bundle_shift = shift;
-			rxrpc_activate_bundle(bundle);
-			bundle->conns[i] = candidate;
-			for (j = 0; j < RXRPC_MAXCALLS; j++)
-				set_bit(shift + j, &bundle->avail_chans);
-			candidate = NULL;
-			break;
-		}
-
-		old = NULL;
+	conn = rxrpc_alloc_client_connection(bundle);
+	if (IS_ERR(conn)) {
+		bundle->alloc_error = PTR_ERR(conn);
+		return false;
 	}
 
-	spin_unlock(&bundle->channel_lock);
-
-	if (candidate) {
-		_debug("discard C=%x", candidate->debug_id);
-		trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
-		rxrpc_put_connection(candidate, rxrpc_conn_put_discard);
-	}
-
-	rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
-	_leave("");
+	rxrpc_activate_bundle(bundle);
+	conn->bundle_shift = shift;
+	bundle->conns[slot] = conn;
+	for (i = 0; i < RXRPC_MAXCALLS; i++)
+		set_bit(shift + i, &bundle->avail_chans);
+	return true;
 }
 
 /*
  * Add a connection to a bundle if there are no usable connections or we have
  * connections waiting for extra capacity.
  */
-static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp)
+static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle)
 {
-	struct rxrpc_call *call;
-	int i, usable;
+	int slot = -1, i, usable;
 
 	_enter("");
 
-	spin_lock(&bundle->channel_lock);
+	bundle->alloc_error = 0;
 
 	/* See if there are any usable connections. */
 	usable = 0;
-	for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
+	for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
 		if (rxrpc_may_reuse_conn(bundle->conns[i]))
 			usable++;
-
-	if (!usable && !list_empty(&bundle->waiting_calls)) {
-		call = list_first_entry(&bundle->waiting_calls,
-					struct rxrpc_call, chan_wait_link);
-		if (test_bit(RXRPC_CALL_UPGRADE, &call->flags))
-			bundle->try_upgrade = true;
+		else if (slot == -1)
+			slot = i;
 	}
 
+	if (!usable && bundle->upgrade)
+		bundle->try_upgrade = true;
+
 	if (!usable)
 		goto alloc_conn;
 
 	if (!bundle->avail_chans &&
 	    !bundle->try_upgrade &&
-	    !list_empty(&bundle->waiting_calls) &&
 	    usable < ARRAY_SIZE(bundle->conns))
 		goto alloc_conn;
 
-	spin_unlock(&bundle->channel_lock);
 	_leave("");
-	return;
+	return usable;
 
 alloc_conn:
-	return rxrpc_add_conn_to_bundle(bundle, gfp);
+	return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false;
 }
 
 /*
@@ -531,11 +405,13 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 	struct rxrpc_channel *chan = &conn->channels[channel];
 	struct rxrpc_bundle *bundle = conn->bundle;
 	struct rxrpc_call *call = list_entry(bundle->waiting_calls.next,
-					     struct rxrpc_call, chan_wait_link);
+					     struct rxrpc_call, wait_link);
 	u32 call_id = chan->call_counter + 1;
 
 	_enter("C=%x,%u", conn->debug_id, channel);
 
+	list_del_init(&call->wait_link);
+
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
 
 	/* Cancel the final ACK on the previous call if it hasn't been sent yet
@@ -545,65 +421,50 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
 	clear_bit(conn->bundle_shift + channel, &bundle->avail_chans);
 
 	rxrpc_see_call(call, rxrpc_call_see_activate_client);
-	list_del_init(&call->chan_wait_link);
 	call->conn	= rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
 	call->cid	= conn->proto.cid | channel;
 	call->call_id	= call_id;
 	call->dest_srx.srx_service = conn->service_id;
-
-	trace_rxrpc_connect_call(call);
-
-	rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST);
-
-	/* Paired with the read barrier in rxrpc_connect_call().  This orders
-	 * cid and epoch in the connection wrt to call_id without the need to
-	 * take the channel_lock.
-	 *
-	 * We provisionally assign a callNumber at this point, but we don't
-	 * confirm it until the call is about to be exposed.
-	 *
-	 * TODO: Pair with a barrier in the data_ready handler when that looks
-	 * at the call ID through a connection channel.
-	 */
-	smp_wmb();
+	call->cong_ssthresh = call->peer->cong_ssthresh;
+	if (call->cong_cwnd >= call->cong_ssthresh)
+		call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+	else
+		call->cong_mode = RXRPC_CALL_SLOW_START;
 
 	chan->call_id		= call_id;
 	chan->call_debug_id	= call->debug_id;
-	rcu_assign_pointer(chan->call, call);
+	chan->call		= call;
+
+	rxrpc_see_call(call, rxrpc_call_see_connected);
+	trace_rxrpc_connect_call(call);
+	call->tx_last_sent = ktime_get_real();
+	rxrpc_start_call_timer(call);
+	rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST);
 	wake_up(&call->waitq);
 }
 
 /*
  * Remove a connection from the idle list if it's on it.
  */
-static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn)
+static void rxrpc_unidle_conn(struct rxrpc_connection *conn)
 {
-	struct rxrpc_local *local = bundle->local;
-	bool drop_ref;
-
 	if (!list_empty(&conn->cache_link)) {
-		drop_ref = false;
-		spin_lock(&local->client_conn_cache_lock);
-		if (!list_empty(&conn->cache_link)) {
-			list_del_init(&conn->cache_link);
-			drop_ref = true;
-		}
-		spin_unlock(&local->client_conn_cache_lock);
-		if (drop_ref)
-			rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
+		list_del_init(&conn->cache_link);
+		rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
 	}
 }
 
 /*
- * Assign channels and callNumbers to waiting calls with channel_lock
- * held by caller.
+ * Assign channels and callNumbers to waiting calls.
  */
-static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
+static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
 {
 	struct rxrpc_connection *conn;
 	unsigned long avail, mask;
 	unsigned int channel, slot;
 
+	trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
+
 	if (bundle->try_upgrade)
 		mask = 1;
 	else
@@ -623,7 +484,7 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
 
 		if (bundle->try_upgrade)
 			set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
-		rxrpc_unidle_conn(bundle, conn);
+		rxrpc_unidle_conn(conn);
 
 		channel &= (RXRPC_MAXCALLS - 1);
 		conn->act_chans	|= 1 << channel;
@@ -632,125 +493,24 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
 }
 
 /*
- * Assign channels and callNumbers to waiting calls.
- */
-static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
-{
-	_enter("B=%x", bundle->debug_id);
-
-	trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
-
-	if (!bundle->avail_chans)
-		return;
-
-	spin_lock(&bundle->channel_lock);
-	rxrpc_activate_channels_locked(bundle);
-	spin_unlock(&bundle->channel_lock);
-	_leave("");
-}
-
-/*
- * Wait for a callNumber and a channel to be granted to a call.
- */
-static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle,
-				  struct rxrpc_call *call, gfp_t gfp)
-{
-	DECLARE_WAITQUEUE(myself, current);
-	int ret = 0;
-
-	_enter("%d", call->debug_id);
-
-	if (!gfpflags_allow_blocking(gfp)) {
-		rxrpc_maybe_add_conn(bundle, gfp);
-		rxrpc_activate_channels(bundle);
-		ret = bundle->alloc_error ?: -EAGAIN;
-		goto out;
-	}
-
-	add_wait_queue_exclusive(&call->waitq, &myself);
-	for (;;) {
-		rxrpc_maybe_add_conn(bundle, gfp);
-		rxrpc_activate_channels(bundle);
-		ret = bundle->alloc_error;
-		if (ret < 0)
-			break;
-
-		switch (call->interruptibility) {
-		case RXRPC_INTERRUPTIBLE:
-		case RXRPC_PREINTERRUPTIBLE:
-			set_current_state(TASK_INTERRUPTIBLE);
-			break;
-		case RXRPC_UNINTERRUPTIBLE:
-		default:
-			set_current_state(TASK_UNINTERRUPTIBLE);
-			break;
-		}
-		if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN)
-			break;
-		if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
-		     call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
-		    signal_pending(current)) {
-			ret = -ERESTARTSYS;
-			break;
-		}
-		schedule();
-	}
-	remove_wait_queue(&call->waitq, &myself);
-	__set_current_state(TASK_RUNNING);
-
-out:
-	_leave(" = %d", ret);
-	return ret;
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Connect waiting channels (called from the I/O thread).
  */
-int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
+void rxrpc_connect_client_calls(struct rxrpc_local *local)
 {
-	struct rxrpc_bundle *bundle;
-	int ret = 0;
-
-	_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
-	rxrpc_get_call(call, rxrpc_call_get_io_thread);
-
-	bundle = rxrpc_prep_call(call, gfp);
-	if (IS_ERR(bundle)) {
-		rxrpc_put_call(call, rxrpc_call_get_io_thread);
-		ret = PTR_ERR(bundle);
-		goto out;
-	}
-
-	if (rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_CONN) {
-		ret = rxrpc_wait_for_channel(bundle, call, gfp);
-		if (ret < 0)
-			goto wait_failed;
-	}
-
-granted_channel:
-	/* Paired with the write barrier in rxrpc_activate_one_channel(). */
-	smp_rmb();
+	struct rxrpc_call *call;
 
-out:
-	_leave(" = %d", ret);
-	return ret;
+	while ((call = list_first_entry_or_null(&local->new_client_calls,
+						struct rxrpc_call, wait_link))
+	       ) {
+		struct rxrpc_bundle *bundle = call->bundle;
 
-wait_failed:
-	spin_lock(&bundle->channel_lock);
-	list_del_init(&call->chan_wait_link);
-	spin_unlock(&bundle->channel_lock);
+		spin_lock(&local->client_call_lock);
+		list_move_tail(&call->wait_link, &bundle->waiting_calls);
+		spin_unlock(&local->client_call_lock);
 
-	if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) {
-		ret = 0;
-		goto granted_channel;
+		if (rxrpc_bundle_has_space(bundle))
+			rxrpc_activate_channels(bundle);
 	}
-
-	trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
-	rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
-	rxrpc_disconnect_client_call(bundle, call);
-	goto out;
 }
 
 /*
@@ -808,8 +568,6 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 
 	_enter("c=%x", call->debug_id);
 
-	spin_lock(&bundle->channel_lock);
-
 	/* Calls that have never actually been assigned a channel can simply be
 	 * discarded.
 	 */
@@ -818,8 +576,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 		_debug("call is waiting");
 		ASSERTCMP(call->call_id, ==, 0);
 		ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
-		list_del_init(&call->chan_wait_link);
-		goto out;
+		list_del_init(&call->wait_link);
+		return;
 	}
 
 	cid = call->cid;
@@ -827,10 +585,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 	chan = &conn->channels[channel];
 	trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
 
-	if (rcu_access_pointer(chan->call) != call) {
-		spin_unlock(&bundle->channel_lock);
-		BUG();
-	}
+	if (WARN_ON(chan->call != call))
+		return;
 
 	may_reuse = rxrpc_may_reuse_conn(conn);
 
@@ -851,16 +607,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 			trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
 			bundle->try_upgrade = false;
 			if (may_reuse)
-				rxrpc_activate_channels_locked(bundle);
+				rxrpc_activate_channels(bundle);
 		}
-
 	}
 
 	/* See if we can pass the channel directly to another call. */
 	if (may_reuse && !list_empty(&bundle->waiting_calls)) {
 		trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
 		rxrpc_activate_one_channel(conn, channel);
-		goto out;
+		return;
 	}
 
 	/* Schedule the final ACK to be transmitted in a short while so that it
@@ -878,7 +633,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 	}
 
 	/* Deactivate the channel. */
-	rcu_assign_pointer(chan->call, NULL);
+	chan->call = NULL;
 	set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans);
 	conn->act_chans	&= ~(1 << channel);
 
@@ -891,15 +646,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
 		conn->idle_timestamp = jiffies;
 
 		rxrpc_get_connection(conn, rxrpc_conn_get_idle);
-		spin_lock(&local->client_conn_cache_lock);
 		list_move_tail(&conn->cache_link, &local->idle_client_conns);
-		spin_unlock(&local->client_conn_cache_lock);
 
 		rxrpc_set_client_reap_timer(local);
 	}
-
-out:
-	spin_unlock(&bundle->channel_lock);
 }
 
 /*
@@ -909,7 +659,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
 {
 	struct rxrpc_bundle *bundle = conn->bundle;
 	unsigned int bindex;
-	bool need_drop = false;
 	int i;
 
 	_enter("C=%x", conn->debug_id);
@@ -917,18 +666,13 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
 	if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
 		rxrpc_process_delayed_final_acks(conn, true);
 
-	spin_lock(&bundle->channel_lock);
 	bindex = conn->bundle_shift / RXRPC_MAXCALLS;
 	if (bundle->conns[bindex] == conn) {
 		_debug("clear slot %u", bindex);
 		bundle->conns[bindex] = NULL;
 		for (i = 0; i < RXRPC_MAXCALLS; i++)
 			clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
-		need_drop = true;
-	}
-	spin_unlock(&bundle->channel_lock);
-
-	if (need_drop) {
+		rxrpc_put_client_connection_id(bundle->local, conn);
 		rxrpc_deactivate_bundle(bundle);
 		rxrpc_put_connection(conn, rxrpc_conn_put_unbundle);
 	}
@@ -990,24 +734,16 @@ void rxrpc_discard_expired_client_conns(struct rxrpc_local *local)
 
 	_enter("");
 
-	if (list_empty(&local->idle_client_conns)) {
-		_leave(" [empty]");
-		return;
-	}
-
 	/* We keep an estimate of what the number of conns ought to be after
 	 * we've discarded some so that we don't overdo the discarding.
 	 */
 	nr_conns = atomic_read(&local->rxnet->nr_client_conns);
 
 next:
-	spin_lock(&local->client_conn_cache_lock);
-
-	if (list_empty(&local->idle_client_conns))
-		goto out;
-
-	conn = list_entry(local->idle_client_conns.next,
-			  struct rxrpc_connection, cache_link);
+	conn = list_first_entry_or_null(&local->idle_client_conns,
+					struct rxrpc_connection, cache_link);
+	if (!conn)
+		return;
 
 	if (!local->kill_all_client_conns) {
 		/* If the number of connections is over the reap limit, we
@@ -1032,8 +768,6 @@ next:
 	trace_rxrpc_client(conn, -1, rxrpc_client_discard);
 	list_del_init(&conn->cache_link);
 
-	spin_unlock(&local->client_conn_cache_lock);
-
 	rxrpc_unbundle_conn(conn);
 	/* Drop the ->cache_link ref */
 	rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle);
@@ -1053,8 +787,6 @@ not_yet_expired:
 	if (!local->kill_all_client_conns)
 		timer_reduce(&local->client_conn_reap_timer, conn_expires_at);
 
-out:
-	spin_unlock(&local->client_conn_cache_lock);
 	_leave("");
 }
 
@@ -1063,34 +795,19 @@ out:
  */
 void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
 {
-	struct rxrpc_connection *conn, *tmp;
-	LIST_HEAD(graveyard);
+	struct rxrpc_connection *conn;
 
 	_enter("");
 
-	spin_lock(&local->client_conn_cache_lock);
 	local->kill_all_client_conns = true;
-	spin_unlock(&local->client_conn_cache_lock);
 
 	del_timer_sync(&local->client_conn_reap_timer);
 
-	spin_lock(&local->client_conn_cache_lock);
-
-	list_for_each_entry_safe(conn, tmp, &local->idle_client_conns,
-				 cache_link) {
-		if (conn->local == local) {
-			atomic_dec(&conn->active);
-			trace_rxrpc_client(conn, -1, rxrpc_client_discard);
-			list_move(&conn->cache_link, &graveyard);
-		}
-	}
-
-	spin_unlock(&local->client_conn_cache_lock);
-
-	while (!list_empty(&graveyard)) {
-		conn = list_entry(graveyard.next,
-				  struct rxrpc_connection, cache_link);
+	while ((conn = list_first_entry_or_null(&local->idle_client_conns,
+						struct rxrpc_connection, cache_link))) {
 		list_del_init(&conn->cache_link);
+		atomic_dec(&conn->active);
+		trace_rxrpc_client(conn, -1, rxrpc_client_discard);
 		rxrpc_unbundle_conn(conn);
 		rxrpc_put_connection(conn, rxrpc_conn_put_local_dead);
 	}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 8d0b9ff0a5e1..44414e724415 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -100,9 +100,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	/* If the last call got moved on whilst we were waiting to run, just
 	 * ignore this packet.
 	 */
-	call_id = READ_ONCE(chan->last_call);
-	/* Sync with __rxrpc_disconnect_call() */
-	smp_rmb();
+	call_id = chan->last_call;
 	if (skb && call_id != sp->hdr.callNumber)
 		return;
 
@@ -119,9 +117,12 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 	iov[2].iov_base	= &ack_info;
 	iov[2].iov_len	= sizeof(ack_info);
 
+	serial = atomic_inc_return(&conn->serial);
+
 	pkt.whdr.epoch		= htonl(conn->proto.epoch);
 	pkt.whdr.cid		= htonl(conn->proto.cid | channel);
 	pkt.whdr.callNumber	= htonl(call_id);
+	pkt.whdr.serial		= htonl(serial);
 	pkt.whdr.seq		= 0;
 	pkt.whdr.type		= chan->last_type;
 	pkt.whdr.flags		= conn->out_clientflag;
@@ -158,31 +159,15 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
 		iov[0].iov_len += sizeof(pkt.ack);
 		len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
 		ioc = 3;
-		break;
-
-	default:
-		return;
-	}
-
-	/* Resync with __rxrpc_disconnect_call() and check that the last call
-	 * didn't get advanced whilst we were filling out the packets.
-	 */
-	smp_rmb();
-	if (READ_ONCE(chan->last_call) != call_id)
-		return;
-
-	serial = atomic_inc_return(&conn->serial);
-	pkt.whdr.serial = htonl(serial);
 
-	switch (chan->last_type) {
-	case RXRPC_PACKET_TYPE_ABORT:
-		break;
-	case RXRPC_PACKET_TYPE_ACK:
 		trace_rxrpc_tx_ack(chan->call_debug_id, serial,
 				   ntohl(pkt.ack.firstPacket),
 				   ntohl(pkt.ack.serial),
 				   pkt.ack.reason, 0);
 		break;
+
+	default:
+		return;
 	}
 
 	ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len);
@@ -207,12 +192,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
 
 	_enter("{%d},%x", conn->debug_id, conn->abort_code);
 
-	spin_lock(&conn->bundle->channel_lock);
-
 	for (i = 0; i < RXRPC_MAXCALLS; i++) {
-		call = rcu_dereference_protected(
-			conn->channels[i].call,
-			lockdep_is_held(&conn->bundle->channel_lock));
+		call = conn->channels[i].call;
 		if (call)
 			rxrpc_set_call_completion(call,
 						  conn->completion,
@@ -220,7 +201,6 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn)
 						  conn->error);
 	}
 
-	spin_unlock(&conn->bundle->channel_lock);
 	_leave("");
 }
 
@@ -316,9 +296,7 @@ again:
 		if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
 			continue;
 
-		smp_rmb(); /* vs rxrpc_disconnect_client_call */
-		ack_at = READ_ONCE(chan->final_ack_at);
-
+		ack_at = chan->final_ack_at;
 		if (time_before(j, ack_at) && !force) {
 			if (time_before(ack_at, next_j)) {
 				next_j = ack_at;
@@ -446,15 +424,8 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
 		if (conn->state != RXRPC_CONN_SERVICE)
 			break;
 
-		spin_lock(&conn->bundle->channel_lock);
-
 		for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
-			rxrpc_call_is_secure(
-				rcu_dereference_protected(
-					conn->channels[loop].call,
-					lockdep_is_held(&conn->bundle->channel_lock)));
-
-		spin_unlock(&conn->bundle->channel_lock);
+			rxrpc_call_is_secure(conn->channels[loop].call);
 		break;
 	}
 
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 3d8c1dc6a82a..ac85d4644a3c 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
 		INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
 		INIT_LIST_HEAD(&conn->proc_link);
 		INIT_LIST_HEAD(&conn->link);
+		mutex_init(&conn->security_lock);
 		skb_queue_head_init(&conn->rx_queue);
 		conn->rxnet = rxnet;
 		conn->security = &rxrpc_no_security;
@@ -157,7 +158,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
 
 	_enter("%d,%x", conn->debug_id, call->cid);
 
-	if (rcu_access_pointer(chan->call) == call) {
+	if (chan->call == call) {
 		/* Save the result of the call so that we can repeat it if necessary
 		 * through the channel, whilst disposing of the actual call record.
 		 */
@@ -177,12 +178,9 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
 			break;
 		}
 
-		/* Sync with rxrpc_conn_retransmit(). */
-		smp_wmb();
 		chan->last_call = chan->call_id;
 		chan->call_id = chan->call_counter;
-
-		rcu_assign_pointer(chan->call, NULL);
+		chan->call = NULL;
 	}
 
 	_leave("");
@@ -210,10 +208,7 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
 	if (rxrpc_is_client_call(call)) {
 		rxrpc_disconnect_client_call(call->bundle, call);
 	} else {
-		spin_lock(&conn->bundle->channel_lock);
 		__rxrpc_disconnect_call(conn, call);
-		spin_unlock(&conn->bundle->channel_lock);
-
 		conn->idle_timestamp = jiffies;
 		if (atomic_dec_and_test(&conn->active))
 			rxrpc_set_service_reap_timer(conn->rxnet,
@@ -316,10 +311,10 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
 		container_of(work, struct rxrpc_connection, destructor);
 	struct rxrpc_net *rxnet = conn->rxnet;
 
-	ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
-	       !rcu_access_pointer(conn->channels[1].call) &&
-	       !rcu_access_pointer(conn->channels[2].call) &&
-	       !rcu_access_pointer(conn->channels[3].call));
+	ASSERT(!conn->channels[0].call &&
+	       !conn->channels[1].call &&
+	       !conn->channels[2].call &&
+	       !conn->channels[3].call);
 	ASSERT(list_empty(&conn->cache_link));
 
 	del_timer_sync(&conn->timer);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 2a55a88b2a5b..f30323de82bd 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -11,7 +11,6 @@
 static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
 	.ref		= REFCOUNT_INIT(1),
 	.debug_id	= UINT_MAX,
-	.channel_lock	= __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
 };
 
 /*
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index a299cc34c140..9e9dfb2fc559 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -369,10 +369,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
 		return just_discard;
 	}
 
-	rcu_read_lock();
-	call = rxrpc_try_get_call(rcu_dereference(chan->call),
-				  rxrpc_call_get_input);
-	rcu_read_unlock();
+	call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input);
 
 	if (sp->hdr.callNumber > chan->call_id) {
 		if (rxrpc_to_client(sp)) {
@@ -453,6 +450,9 @@ int rxrpc_io_thread(void *data)
 			continue;
 		}
 
+		if (!list_empty(&local->new_client_calls))
+			rxrpc_connect_client_calls(local);
+
 		/* Process received packets and errors. */
 		if ((skb = __skb_dequeue(&rx_queue))) {
 			struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -492,7 +492,10 @@ int rxrpc_io_thread(void *data)
 		should_stop = kthread_should_stop();
 		if (!skb_queue_empty(&local->rx_queue) ||
 		    !list_empty(&local->call_attend_q) ||
-		    !list_empty(&local->conn_attend_q)) {
+		    !list_empty(&local->conn_attend_q) ||
+		    !list_empty(&local->new_client_calls) ||
+		    test_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+			     &local->client_conn_flags)) {
 			__set_current_state(TASK_RUNNING);
 			continue;
 		}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 9bc8d08ca12c..b8eaca5d9f22 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -117,7 +117,6 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
 		local->client_bundles = RB_ROOT;
 		spin_lock_init(&local->client_bundles_lock);
 		local->kill_all_client_conns = false;
-		spin_lock_init(&local->client_conn_cache_lock);
 		INIT_LIST_HEAD(&local->idle_client_conns);
 		timer_setup(&local->client_conn_reap_timer,
 			    rxrpc_client_conn_reap_timeout, 0);
@@ -133,7 +132,8 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
 		if (tmp == 0)
 			tmp = 1;
 		idr_set_cursor(&local->conn_ids, tmp);
-		spin_lock_init(&local->conn_lock);
+		INIT_LIST_HEAD(&local->new_client_calls);
+		spin_lock_init(&local->client_call_lock);
 
 		trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1);
 	}
@@ -435,7 +435,7 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
 	 * local endpoint.
 	 */
 	rxrpc_purge_queue(&local->rx_queue);
-	rxrpc_destroy_client_conn_ids(local);
+	rxrpc_purge_client_connections(local);
 }
 
 /*
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index c39ef94602ed..750158a085cd 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -12,6 +12,7 @@
 
 static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
 	[RXRPC_CONN_UNUSED]			= "Unused  ",
+	[RXRPC_CONN_CLIENT_UNSECURED]		= "ClUnsec ",
 	[RXRPC_CONN_CLIENT]			= "Client  ",
 	[RXRPC_CONN_SERVICE_PREALLOC]		= "SvPrealc",
 	[RXRPC_CONN_SERVICE_UNSECURED]		= "SvUnsec ",
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index dfb01e7b90fb..1bf571a66e02 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -1122,36 +1122,31 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 		goto protocol_error_free;
 	}
 
-	spin_lock(&conn->bundle->channel_lock);
 	for (i = 0; i < RXRPC_MAXCALLS; i++) {
-		struct rxrpc_call *call;
 		u32 call_id = ntohl(response->encrypted.call_id[i]);
+		u32 counter = READ_ONCE(conn->channels[i].call_counter);
 
 		if (call_id > INT_MAX) {
 			rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
 					 rxkad_abort_resp_bad_callid);
-			goto protocol_error_unlock;
+			goto protocol_error_free;
 		}
 
-		if (call_id < conn->channels[i].call_counter) {
+		if (call_id < counter) {
 			rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
 					 rxkad_abort_resp_call_ctr);
-			goto protocol_error_unlock;
+			goto protocol_error_free;
 		}
 
-		if (call_id > conn->channels[i].call_counter) {
-			call = rcu_dereference_protected(
-				conn->channels[i].call,
-				lockdep_is_held(&conn->bundle->channel_lock));
-			if (call && !__rxrpc_call_is_complete(call)) {
+		if (call_id > counter) {
+			if (conn->channels[i].call) {
 				rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
 						 rxkad_abort_resp_call_state);
-				goto protocol_error_unlock;
+				goto protocol_error_free;
 			}
 			conn->channels[i].call_counter = call_id;
 		}
 	}
-	spin_unlock(&conn->bundle->channel_lock);
 
 	if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) {
 		rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO,
@@ -1179,8 +1174,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
 	_leave(" = 0");
 	return 0;
 
-protocol_error_unlock:
-	spin_unlock(&conn->bundle->channel_lock);
 protocol_error_free:
 	kfree(ticket);
 protocol_error:
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 78af14694618..cd66634dffe6 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -97,38 +97,31 @@ found:
  */
 int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
 {
-	const struct rxrpc_security *sec;
 	struct rxrpc_key_token *token;
 	struct key *key = conn->key;
-	int ret;
+	int ret = 0;
 
 	_enter("{%d},{%x}", conn->debug_id, key_serial(key));
 
-	if (!key)
-		return 0;
-
-	ret = key_validate(key);
-	if (ret < 0)
-		return ret;
-
 	for (token = key->payload.data[0]; token; token = token->next) {
-		sec = rxrpc_security_lookup(token->security_index);
-		if (sec)
+		if (token->security_index == conn->security->security_index)
 			goto found;
 	}
 	return -EKEYREJECTED;
 
 found:
-	conn->security = sec;
-
-	ret = conn->security->init_connection_security(conn, token);
-	if (ret < 0) {
-		conn->security = &rxrpc_no_security;
-		return ret;
+	mutex_lock(&conn->security_lock);
+	if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+		ret = conn->security->init_connection_security(conn, token);
+		if (ret == 0) {
+			spin_lock(&conn->state_lock);
+			if (conn->state == RXRPC_CONN_CLIENT_UNSECURED)
+				conn->state = RXRPC_CONN_CLIENT;
+			spin_unlock(&conn->state_lock);
+		}
 	}
-
-	_leave(" = 0");
-	return 0;
+	mutex_unlock(&conn->security_lock);
+	return ret;
 }
 
 /*
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index a5d0005b7ce5..da49fcf1c456 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -38,6 +38,60 @@ bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
 	return false;
 }
 
+/*
+ * Wait for a call to become connected.  Interruption here doesn't cause the
+ * call to be aborted.
+ */
+static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo)
+{
+	DECLARE_WAITQUEUE(myself, current);
+	int ret = 0;
+
+	_enter("%d", call->debug_id);
+
+	if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN)
+		return call->error;
+
+	add_wait_queue_exclusive(&call->waitq, &myself);
+
+	for (;;) {
+		ret = call->error;
+		if (ret < 0)
+			break;
+
+		switch (call->interruptibility) {
+		case RXRPC_INTERRUPTIBLE:
+		case RXRPC_PREINTERRUPTIBLE:
+			set_current_state(TASK_INTERRUPTIBLE);
+			break;
+		case RXRPC_UNINTERRUPTIBLE:
+		default:
+			set_current_state(TASK_UNINTERRUPTIBLE);
+			break;
+		}
+		if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) {
+			ret = call->error;
+			break;
+		}
+		if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+		     call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
+		    signal_pending(current)) {
+			ret = sock_intr_errno(*timeo);
+			break;
+		}
+		*timeo = schedule_timeout(*timeo);
+	}
+
+	remove_wait_queue(&call->waitq, &myself);
+	__set_current_state(TASK_RUNNING);
+
+	if (ret == 0 && rxrpc_call_is_complete(call))
+		ret = call->error;
+
+	_leave(" = %d", ret);
+	return ret;
+}
+
 /*
  * Return true if there's sufficient Tx queue space.
  */
@@ -239,6 +293,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
+	ret = rxrpc_wait_to_be_connected(call, &timeo);
+	if (ret < 0)
+		return ret;
+
+	if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+		ret = rxrpc_init_client_conn_security(call->conn);
+		if (ret < 0)
+			return ret;
+	}
+
 	/* this should be in poll */
 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
-- 
cgit v1.3.1


From 7c3d5c20dc169e55064f7f38c1c56cfbc39ee5b2 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 3 Oct 2022 11:25:34 +0200
Subject: thermal/core: Add a generic thermal_zone_get_trip() function

The thermal_zone_device_ops structure defines a set of ops family,
get_trip_temp(), get_trip_hyst(), get_trip_type(). Each of them is
returning a property of a trip point.

The result is the code is calling the ops everywhere to get a trip
point which is supposed to be defined in the backend driver. It is a
non-sense as a thermal trip can be generic and used by the backend
driver to declare its trip points.

Part of the thermal framework has been changed and all the OF thermal
drivers are using the same definition for the trip point and use a
thermal zone registration variant to pass those trip points which are
part of the thermal zone device structure.

Consequently, we can use a generic function to get the trip points
when they are stored in the thermal zone device structure.

This approach can be generalized to all the drivers and we can get rid
of the ops->get_trip_*. That will result to a much more simpler code
and make possible to rework how the thermal trip are handled in the
thermal core framework as discussed previously.

This change adds a function thermal_zone_get_trip() where we get the
thermal trip point structure which contains all the properties (type,
temp, hyst) instead of doing multiple calls to ops->get_trip_*.

That opens the door for trip point extension with more attributes. For
instance, replacing the trip points disabled bitmask with a 'disabled'
field in the structure.

Here we replace all the calls to ops->get_trip_* in the thermal core
code with a call to the thermal_zone_get_trip() function.

The thermal zone ops defines a callback to retrieve the critical
temperature. As the trip handling is being reworked, all the trip
points will be the same whatever the driver and consequently finding
the critical trip temperature will be just a loop to search for a
critical trip point type.

Provide such a generic function, so we encapsulate the ops
get_crit_temp() which can be removed when all the backend drivers are
using the generic trip points handling.

While at it, add the thermal_zone_get_num_trips() to encapsulate the
code more and reduce the grip with the thermal framework internals.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Zhang Rui <rui.zhang@intel.com>
Link: https://lore.kernel.org/r/20221003092602.1323944-2-daniel.lezcano@linaro.org
---
 drivers/thermal/thermal_core.c    | 114 ++++++++++++++++++++++++++++++--------
 drivers/thermal/thermal_core.h    |   2 +
 drivers/thermal/thermal_helpers.c |  28 +++++-----
 drivers/thermal/thermal_netlink.c |  19 +++----
 drivers/thermal/thermal_sysfs.c   |  64 +++++++++------------
 include/linux/thermal.h           |   7 +++
 6 files changed, 147 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index f17ab2316dbd..b043c315c005 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -344,35 +344,31 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
 		tz->ops->critical(tz);
 }
 
-static void handle_thermal_trip(struct thermal_zone_device *tz, int trip)
+static void handle_thermal_trip(struct thermal_zone_device *tz, int trip_id)
 {
-	enum thermal_trip_type type;
-	int trip_temp, hyst = 0;
+	struct thermal_trip trip;
 
 	/* Ignore disabled trip points */
-	if (test_bit(trip, &tz->trips_disabled))
+	if (test_bit(trip_id, &tz->trips_disabled))
 		return;
 
-	tz->ops->get_trip_temp(tz, trip, &trip_temp);
-	tz->ops->get_trip_type(tz, trip, &type);
-	if (tz->ops->get_trip_hyst)
-		tz->ops->get_trip_hyst(tz, trip, &hyst);
+	__thermal_zone_get_trip(tz, trip_id, &trip);
 
 	if (tz->last_temperature != THERMAL_TEMP_INVALID) {
-		if (tz->last_temperature < trip_temp &&
-		    tz->temperature >= trip_temp)
-			thermal_notify_tz_trip_up(tz->id, trip,
+		if (tz->last_temperature < trip.temperature &&
+		    tz->temperature >= trip.temperature)
+			thermal_notify_tz_trip_up(tz->id, trip_id,
 						  tz->temperature);
-		if (tz->last_temperature >= trip_temp &&
-		    tz->temperature < (trip_temp - hyst))
-			thermal_notify_tz_trip_down(tz->id, trip,
+		if (tz->last_temperature >= trip.temperature &&
+		    tz->temperature < (trip.temperature - trip.hysteresis))
+			thermal_notify_tz_trip_down(tz->id, trip_id,
 						    tz->temperature);
 	}
 
-	if (type == THERMAL_TRIP_CRITICAL || type == THERMAL_TRIP_HOT)
-		handle_critical_trips(tz, trip, trip_temp, type);
+	if (trip.type == THERMAL_TRIP_CRITICAL || trip.type == THERMAL_TRIP_HOT)
+		handle_critical_trips(tz, trip_id, trip.temperature, trip.type);
 	else
-		handle_non_critical_trips(tz, trip);
+		handle_non_critical_trips(tz, trip_id);
 }
 
 static void update_temperature(struct thermal_zone_device *tz)
@@ -1159,6 +1155,79 @@ static void thermal_set_delay_jiffies(unsigned long *delay_jiffies, int delay_ms
 		*delay_jiffies = round_jiffies(*delay_jiffies);
 }
 
+int thermal_zone_get_num_trips(struct thermal_zone_device *tz)
+{
+	return tz->num_trips;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_get_num_trips);
+
+int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp)
+{
+	int i, ret = -EINVAL;
+
+	if (tz->ops->get_crit_temp)
+		return tz->ops->get_crit_temp(tz, temp);
+
+	if (!tz->trips)
+		return -EINVAL;
+
+	mutex_lock(&tz->lock);
+
+	for (i = 0; i < tz->num_trips; i++) {
+		if (tz->trips[i].type == THERMAL_TRIP_CRITICAL) {
+			*temp = tz->trips[i].temperature;
+			ret = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&tz->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_get_crit_temp);
+
+int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
+			    struct thermal_trip *trip)
+{
+	int ret;
+
+	if (!tz || trip_id < 0 || trip_id >= tz->num_trips || !trip)
+		return -EINVAL;
+
+	if (tz->trips) {
+		*trip = tz->trips[trip_id];
+		return 0;
+	}
+
+	if (tz->ops->get_trip_hyst) {
+		ret = tz->ops->get_trip_hyst(tz, trip_id, &trip->hysteresis);
+		if (ret)
+			return ret;
+	} else {
+		trip->hysteresis = 0;
+	}
+
+	ret = tz->ops->get_trip_temp(tz, trip_id, &trip->temperature);
+	if (ret)
+		return ret;
+
+	return tz->ops->get_trip_type(tz, trip_id, &trip->type);
+}
+
+int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
+			  struct thermal_trip *trip)
+{
+	int ret;
+
+	mutex_lock(&tz->lock);
+	ret = __thermal_zone_get_trip(tz, trip_id, trip);
+	mutex_unlock(&tz->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(thermal_zone_get_trip);
+
 /**
  * thermal_zone_device_register_with_trips() - register a new thermal zone device
  * @type:	the thermal zone device type
@@ -1191,8 +1260,6 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
 					int polling_delay)
 {
 	struct thermal_zone_device *tz;
-	enum thermal_trip_type trip_type;
-	int trip_temp;
 	int id;
 	int result;
 	int count;
@@ -1232,7 +1299,7 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
 		return ERR_PTR(-EINVAL);
 	}
 
-	if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp))
+	if (num_trips > 0 && (!ops->get_trip_type || !ops->get_trip_temp) && !trips)
 		return ERR_PTR(-EINVAL);
 
 	tz = kzalloc(sizeof(*tz), GFP_KERNEL);
@@ -1283,9 +1350,10 @@ thermal_zone_device_register_with_trips(const char *type, struct thermal_trip *t
 		goto release_device;
 
 	for (count = 0; count < num_trips; count++) {
-		if (tz->ops->get_trip_type(tz, count, &trip_type) ||
-		    tz->ops->get_trip_temp(tz, count, &trip_temp) ||
-		    !trip_temp)
+		struct thermal_trip trip;
+
+		result = thermal_zone_get_trip(tz, count, &trip);
+		if (result)
 			set_bit(count, &tz->trips_disabled);
 	}
 
diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h
index b834cb273429..5f475b0a8c2f 100644
--- a/drivers/thermal/thermal_core.h
+++ b/drivers/thermal/thermal_core.h
@@ -114,6 +114,8 @@ void __thermal_zone_device_update(struct thermal_zone_device *tz,
 
 /* Helpers */
 void __thermal_zone_set_trips(struct thermal_zone_device *tz);
+int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
+			    struct thermal_trip *trip);
 int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp);
 
 /* sysfs I/F */
diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c
index 56aa2e88f34f..8977d5ddc23c 100644
--- a/drivers/thermal/thermal_helpers.c
+++ b/drivers/thermal/thermal_helpers.c
@@ -83,7 +83,7 @@ int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)
 	int ret = -EINVAL;
 	int count;
 	int crit_temp = INT_MAX;
-	enum thermal_trip_type type;
+	struct thermal_trip trip;
 
 	lockdep_assert_held(&tz->lock);
 
@@ -91,10 +91,9 @@ int __thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp)
 
 	if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {
 		for (count = 0; count < tz->num_trips; count++) {
-			ret = tz->ops->get_trip_type(tz, count, &type);
-			if (!ret && type == THERMAL_TRIP_CRITICAL) {
-				ret = tz->ops->get_trip_temp(tz, count,
-						&crit_temp);
+			ret = __thermal_zone_get_trip(tz, count, &trip);
+			if (!ret && trip.type == THERMAL_TRIP_CRITICAL) {
+				crit_temp = trip.temperature;
 				break;
 			}
 		}
@@ -164,29 +163,30 @@ EXPORT_SYMBOL_GPL(thermal_zone_get_temp);
  */
 void __thermal_zone_set_trips(struct thermal_zone_device *tz)
 {
-	int low = -INT_MAX;
-	int high = INT_MAX;
-	int trip_temp, hysteresis;
+	struct thermal_trip trip;
+	int low = -INT_MAX, high = INT_MAX;
 	int i, ret;
 
 	lockdep_assert_held(&tz->lock);
 
-	if (!tz->ops->set_trips || !tz->ops->get_trip_hyst)
+	if (!tz->ops->set_trips)
 		return;
 
 	for (i = 0; i < tz->num_trips; i++) {
 		int trip_low;
 
-		tz->ops->get_trip_temp(tz, i, &trip_temp);
-		tz->ops->get_trip_hyst(tz, i, &hysteresis);
+		ret = __thermal_zone_get_trip(tz, i , &trip);
+		if (ret)
+			return;
 
-		trip_low = trip_temp - hysteresis;
+		trip_low = trip.temperature - trip.hysteresis;
 
 		if (trip_low < tz->temperature && trip_low > low)
 			low = trip_low;
 
-		if (trip_temp > tz->temperature && trip_temp < high)
-			high = trip_temp;
+		if (trip.temperature > tz->temperature &&
+		    trip.temperature < high)
+			high = trip.temperature;
 	}
 
 	/* No need to change trip points */
diff --git a/drivers/thermal/thermal_netlink.c b/drivers/thermal/thermal_netlink.c
index e2d78a996b5f..75943b06dbe7 100644
--- a/drivers/thermal/thermal_netlink.c
+++ b/drivers/thermal/thermal_netlink.c
@@ -452,7 +452,8 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p)
 	struct sk_buff *msg = p->msg;
 	struct thermal_zone_device *tz;
 	struct nlattr *start_trip;
-	int i, id;
+	struct thermal_trip trip;
+	int ret, i, id;
 
 	if (!p->attrs[THERMAL_GENL_ATTR_TZ_ID])
 		return -EINVAL;
@@ -471,18 +472,14 @@ static int thermal_genl_cmd_tz_get_trip(struct param *p)
 
 	for (i = 0; i < tz->num_trips; i++) {
 
-		enum thermal_trip_type type;
-		int temp, hyst = 0;
-
-		tz->ops->get_trip_type(tz, i, &type);
-		tz->ops->get_trip_temp(tz, i, &temp);
-		if (tz->ops->get_trip_hyst)
-			tz->ops->get_trip_hyst(tz, i, &hyst);
+		ret = __thermal_zone_get_trip(tz, i, &trip);
+		if (ret)
+			goto out_cancel_nest;
 
 		if (nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_ID, i) ||
-		    nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, type) ||
-		    nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TEMP, temp) ||
-		    nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_HYST, hyst))
+		    nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TYPE, trip.type) ||
+		    nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_TEMP, trip.temperature) ||
+		    nla_put_u32(msg, THERMAL_GENL_ATTR_TZ_TRIP_HYST, trip.hysteresis))
 			goto out_cancel_nest;
 	}
 
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index d97f0bc0a26b..137bbf6adbd6 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -83,27 +83,25 @@ trip_point_type_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
 	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	enum thermal_trip_type type;
-	int trip, result;
+	struct thermal_trip trip;
+	int trip_id, result;
 
-	if (!tz->ops->get_trip_type)
-		return -EPERM;
-
-	if (sscanf(attr->attr.name, "trip_point_%d_type", &trip) != 1)
+	if (sscanf(attr->attr.name, "trip_point_%d_type", &trip_id) != 1)
 		return -EINVAL;
 
 	mutex_lock(&tz->lock);
 
 	if (device_is_registered(dev))
-		result = tz->ops->get_trip_type(tz, trip, &type);
+		result = __thermal_zone_get_trip(tz, trip_id, &trip);
 	else
 		result = -ENODEV;
 
 	mutex_unlock(&tz->lock);
+
 	if (result)
 		return result;
 
-	switch (type) {
+	switch (trip.type) {
 	case THERMAL_TRIP_CRITICAL:
 		return sprintf(buf, "critical\n");
 	case THERMAL_TRIP_HOT:
@@ -122,17 +120,16 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
 		      const char *buf, size_t count)
 {
 	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature, hyst = 0;
-	enum thermal_trip_type type;
+	struct thermal_trip trip;
+	int trip_id, ret;
 
 	if (!tz->ops->set_trip_temp && !tz->trips)
 		return -EPERM;
 
-	if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip) != 1)
+	if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip_id) != 1)
 		return -EINVAL;
 
-	if (kstrtoint(buf, 10, &temperature))
+	if (kstrtoint(buf, 10, &trip.temperature))
 		return -EINVAL;
 
 	mutex_lock(&tz->lock);
@@ -143,25 +140,20 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
 	}
 
 	if (tz->ops->set_trip_temp) {
-		ret = tz->ops->set_trip_temp(tz, trip, temperature);
+		ret = tz->ops->set_trip_temp(tz, trip_id, trip.temperature);
 		if (ret)
 			goto unlock;
 	}
 
 	if (tz->trips)
-		tz->trips[trip].temperature = temperature;
-
-	if (tz->ops->get_trip_hyst) {
-		ret = tz->ops->get_trip_hyst(tz, trip, &hyst);
-		if (ret)
-			goto unlock;
-	}
+		tz->trips[trip_id].temperature = trip.temperature;
 
-	ret = tz->ops->get_trip_type(tz, trip, &type);
+	ret = __thermal_zone_get_trip(tz, trip_id, &trip);
 	if (ret)
 		goto unlock;
 
-	thermal_notify_tz_trip_change(tz->id, trip, type, temperature, hyst);
+	thermal_notify_tz_trip_change(tz->id, trip_id, trip.type,
+				      trip.temperature, trip.hysteresis);
 
 	__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
 
@@ -179,19 +171,16 @@ trip_point_temp_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
 	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature;
+	struct thermal_trip trip;
+	int trip_id, ret;
 
-	if (!tz->ops->get_trip_temp)
-		return -EPERM;
-
-	if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip) != 1)
+	if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip_id) != 1)
 		return -EINVAL;
 
 	mutex_lock(&tz->lock);
 
 	if (device_is_registered(dev))
-		ret = tz->ops->get_trip_temp(tz, trip, &temperature);
+		ret = __thermal_zone_get_trip(tz, trip_id, &trip);
 	else
 		ret = -ENODEV;
 
@@ -200,7 +189,7 @@ trip_point_temp_show(struct device *dev, struct device_attribute *attr,
 	if (ret)
 		return ret;
 
-	return sprintf(buf, "%d\n", temperature);
+	return sprintf(buf, "%d\n", trip.temperature);
 }
 
 static ssize_t
@@ -248,25 +237,22 @@ trip_point_hyst_show(struct device *dev, struct device_attribute *attr,
 		     char *buf)
 {
 	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature;
+	struct thermal_trip trip;
+	int trip_id, ret;
 
-	if (!tz->ops->get_trip_hyst)
-		return -EPERM;
-
-	if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip) != 1)
+	if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip_id) != 1)
 		return -EINVAL;
 
 	mutex_lock(&tz->lock);
 
 	if (device_is_registered(dev))
-		ret = tz->ops->get_trip_hyst(tz, trip, &temperature);
+		ret = __thermal_zone_get_trip(tz, trip_id, &trip);
 	else
 		ret = -ENODEV;
 
 	mutex_unlock(&tz->lock);
 
-	return ret ? ret : sprintf(buf, "%d\n", temperature);
+	return ret ? ret : sprintf(buf, "%d\n", trip.hysteresis);
 }
 
 static ssize_t
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 5e093602e8fc..2198b3631f61 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -334,6 +334,13 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev,
 }
 #endif
 
+int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
+			  struct thermal_trip *trip);
+
+int thermal_zone_get_num_trips(struct thermal_zone_device *tz);
+
+int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp);
+
 #ifdef CONFIG_THERMAL
 struct thermal_zone_device *thermal_zone_device_register(const char *, int, int,
 		void *, struct thermal_zone_device_ops *,
-- 
cgit v1.3.1


From 2e38a2a981b24a9e86e687d32708a3d02707c8f6 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Mon, 3 Oct 2022 11:25:36 +0200
Subject: thermal/core: Add a generic thermal_zone_set_trip() function

The thermal zone ops defines a set_trip callback where we can invoke
the backend driver to set an interrupt for the next trip point
temperature being crossed the way up or down, or setting the low level
with the hysteresis.

The ops is only called from the thermal sysfs code where the userspace
has the ability to modify a trip point characteristic.

With the effort of encapsulating the thermal framework core code,
let's create a thermal_zone_set_trip() which is the writable side of
the thermal_zone_get_trip() and put there all the ops encapsulation.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Acked-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Link: https://lore.kernel.org/r/20221003092602.1323944-4-daniel.lezcano@linaro.org
---
 drivers/thermal/thermal_core.c  | 39 ++++++++++++++++++++++++++++
 drivers/thermal/thermal_sysfs.c | 56 +++++++++++------------------------------
 include/linux/thermal.h         |  3 +++
 3 files changed, 57 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index b043c315c005..c24c9efcd175 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1228,6 +1228,45 @@ int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 }
 EXPORT_SYMBOL_GPL(thermal_zone_get_trip);
 
+int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
+			  const struct thermal_trip *trip)
+{
+	struct thermal_trip t;
+	int ret;
+
+	if (!tz->ops->set_trip_temp && !tz->ops->set_trip_hyst && !tz->trips)
+		return -EINVAL;
+
+	ret = __thermal_zone_get_trip(tz, trip_id, &t);
+	if (ret)
+		return ret;
+
+	if (t.type != trip->type)
+		return -EINVAL;
+
+	if (t.temperature != trip->temperature && tz->ops->set_trip_temp) {
+		ret = tz->ops->set_trip_temp(tz, trip_id, trip->temperature);
+		if (ret)
+			return ret;
+	}
+
+	if (t.hysteresis != trip->hysteresis && tz->ops->set_trip_hyst) {
+		ret = tz->ops->set_trip_hyst(tz, trip_id, trip->hysteresis);
+		if (ret)
+			return ret;
+	}
+
+	if (tz->trips && (t.temperature != trip->temperature || t.hysteresis != trip->hysteresis))
+		tz->trips[trip_id] = *trip;
+
+	thermal_notify_tz_trip_change(tz->id, trip_id, trip->type,
+				      trip->temperature, trip->hysteresis);
+
+	__thermal_zone_device_update(tz, THERMAL_TRIP_CHANGED);
+	
+	return 0;
+}
+
 /**
  * thermal_zone_device_register_with_trips() - register a new thermal zone device
  * @type:	the thermal zone device type
diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
index d2d450076090..cef860deaf91 100644
--- a/drivers/thermal/thermal_sysfs.c
+++ b/drivers/thermal/thermal_sysfs.c
@@ -123,15 +123,9 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
 	struct thermal_trip trip;
 	int trip_id, ret;
 
-	if (!tz->ops->set_trip_temp && !tz->trips)
-		return -EPERM;
-
 	if (sscanf(attr->attr.name, "trip_point_%d_temp", &trip_id) != 1)
 		return -EINVAL;
 
-	if (kstrtoint(buf, 10, &trip.temperature))
-		return -EINVAL;
-
 	mutex_lock(&tz->lock);
 
 	if (!device_is_registered(dev)) {
@@ -139,31 +133,19 @@ trip_point_temp_store(struct device *dev, struct device_attribute *attr,
 		goto unlock;
 	}
 
-	if (tz->ops->set_trip_temp) {
-		ret = tz->ops->set_trip_temp(tz, trip_id, trip.temperature);
-		if (ret)
-			goto unlock;
-	}
-
-	if (tz->trips)
-		tz->trips[trip_id].temperature = trip.temperature;
-
 	ret = __thermal_zone_get_trip(tz, trip_id, &trip);
 	if (ret)
 		goto unlock;
 
-	thermal_notify_tz_trip_change(tz->id, trip_id, trip.type,
-				      trip.temperature, trip.hysteresis);
-
-	__thermal_zone_device_update(tz, THERMAL_EVENT_UNSPECIFIED);
+	ret = kstrtoint(buf, 10, &trip.temperature);
+	if (ret)
+		goto unlock;
 
+	ret = thermal_zone_set_trip(tz, trip_id, &trip);
 unlock:
 	mutex_unlock(&tz->lock);
-
-	if (ret)
-		return ret;
-
-	return count;
+	
+	return ret ? ret : count;
 }
 
 static ssize_t
@@ -197,16 +179,13 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
 		      const char *buf, size_t count)
 {
 	struct thermal_zone_device *tz = to_thermal_zone(dev);
-	int trip, ret;
-	int temperature;
-
-	if (!tz->ops->set_trip_hyst)
-		return -EPERM;
+	struct thermal_trip trip;
+	int trip_id, ret;
 
-	if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip) != 1)
+	if (sscanf(attr->attr.name, "trip_point_%d_hyst", &trip_id) != 1)
 		return -EINVAL;
 
-	if (kstrtoint(buf, 10, &temperature))
+	if (kstrtoint(buf, 10, &trip.hysteresis))
 		return -EINVAL;
 
 	mutex_lock(&tz->lock);
@@ -216,16 +195,11 @@ trip_point_hyst_store(struct device *dev, struct device_attribute *attr,
 		goto unlock;
 	}
 
-	/*
-	 * We are not doing any check on the 'temperature' value
-	 * here. The driver implementing 'set_trip_hyst' has to
-	 * take care of this.
-	 */
-	ret = tz->ops->set_trip_hyst(tz, trip, temperature);
-
-	if (!ret)
-		__thermal_zone_set_trips(tz);
-
+	ret = __thermal_zone_get_trip(tz, trip_id, &trip);
+	if (ret)
+		goto unlock;
+	
+	ret = thermal_zone_set_trip(tz, trip_id, &trip);
 unlock:
 	mutex_unlock(&tz->lock);
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 2198b3631f61..e2797f314d99 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -337,6 +337,9 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev,
 int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 			  struct thermal_trip *trip);
 
+int thermal_zone_set_trip(struct thermal_zone_device *tz, int trip_id,
+			  const struct thermal_trip *trip);
+
 int thermal_zone_get_num_trips(struct thermal_zone_device *tz);
 
 int thermal_zone_get_crit_temp(struct thermal_zone_device *tz, int *temp);
-- 
cgit v1.3.1


From e6ec64f85237b48c071158617cfc954a30285fc7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan+linaro@kernel.org>
Date: Wed, 14 Dec 2022 14:16:14 +0100
Subject: thermal/drivers/qcom: Fix set_trip_temp() deadlock

The set_trip_temp() callback is used when changing the trip temperature
through sysfs. As it is called with the thermal-zone-device lock held
it must not use thermal_zone_get_trip() directly or it will deadlock.

Fixes: 78c3e2429be8 ("thermal/drivers/qcom: Use generic thermal_zone_get_trip() function")
Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
Link: https://lore.kernel.org/r/20221214131617.2447-2-johan+linaro@kernel.org
Signed-off-by: Daniel Lezcano <daniel.lezcano@kernel.org>
---
 drivers/thermal/qcom/qcom-spmi-temp-alarm.c | 2 +-
 drivers/thermal/thermal_core.c              | 1 +
 include/linux/thermal.h                     | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
index 58055a7abaf6..bfaec74f13b2 100644
--- a/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
+++ b/drivers/thermal/qcom/qcom-spmi-temp-alarm.c
@@ -270,7 +270,7 @@ static int qpnp_tm_set_trip_temp(struct thermal_zone_device *tz, int trip_id, in
 	struct thermal_trip trip;
 	int ret;
 
-	ret = thermal_zone_get_trip(chip->tz_dev, trip_id, &trip);
+	ret = __thermal_zone_get_trip(chip->tz_dev, trip_id, &trip);
 	if (ret)
 		return ret;
 
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index c24c9efcd175..d9a3d9566d73 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1214,6 +1214,7 @@ int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 
 	return tz->ops->get_trip_type(tz, trip_id, &trip->type);
 }
+EXPORT_SYMBOL_GPL(__thermal_zone_get_trip);
 
 int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 			  struct thermal_trip *trip)
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index e2797f314d99..30353e4b1424 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -334,6 +334,8 @@ static inline void devm_thermal_of_zone_unregister(struct device *dev,
 }
 #endif
 
+int __thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
+			    struct thermal_trip *trip);
 int thermal_zone_get_trip(struct thermal_zone_device *tz, int trip_id,
 			  struct thermal_trip *trip);
 
-- 
cgit v1.3.1


From f3dc61cde80d48751999c4cb46daf3b2185e6895 Mon Sep 17 00:00:00 2001
From: Will Deacon <will@kernel.org>
Date: Fri, 25 Nov 2022 10:18:26 +0000
Subject: firmware/psci: Fix MEM_PROTECT_RANGE function numbers

PSCI v1.1 offers 32-bit and 64-bit variants of the MEM_PROTECT_RANGE
call using function identifier 20.

Fix the incorrect definitions of the MEM_PROTECT_CHECK_RANGE calls in
the PSCI UAPI header.

Cc: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
Cc: Lorenzo Pieralisi <lpieralisi@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging")
Acked-by: Marc Zyngier <maz@kernel.org>
Acked-by: Mark Rutland <mark.rutland@arm.com>
Link: https://lore.kernel.org/r/20221125101826.22404-1-will@kernel.org
Signed-off-by: Will Deacon <will@kernel.org>
---
 include/uapi/linux/psci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h
index 3511095c2702..42a40ad3fb62 100644
--- a/include/uapi/linux/psci.h
+++ b/include/uapi/linux/psci.h
@@ -58,7 +58,7 @@
 
 #define PSCI_1_1_FN_SYSTEM_RESET2		PSCI_0_2_FN(18)
 #define PSCI_1_1_FN_MEM_PROTECT			PSCI_0_2_FN(19)
-#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN(19)
+#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN(20)
 
 #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND	PSCI_0_2_FN64(12)
 #define PSCI_1_0_FN64_NODE_HW_STATE		PSCI_0_2_FN64(13)
@@ -67,7 +67,7 @@
 #define PSCI_1_0_FN64_STAT_COUNT		PSCI_0_2_FN64(17)
 
 #define PSCI_1_1_FN64_SYSTEM_RESET2		PSCI_0_2_FN64(18)
-#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN64(19)
+#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE	PSCI_0_2_FN64(20)
 
 /* PSCI v0.2 power state encoding for CPU_SUSPEND function */
 #define PSCI_0_2_POWER_STATE_ID_MASK		0xffff
-- 
cgit v1.3.1


From da2e552b469a0cd130ff70a88ccc4139da428a65 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@nvidia.com>
Date: Mon, 28 Nov 2022 19:05:47 +0200
Subject: net/mlx5: Fix command stats access after free

Command may fail while driver is reloading and can't accept FW commands
till command interface is reinitialized. Such command failure is being
logged to command stats. This results in NULL pointer access as command
stats structure is being freed and reallocated during mlx5 devlink
reload (see kernel log below).

Fix it by making command stats statically allocated on driver probe.

Kernel log:
[ 2394.808802] BUG: unable to handle kernel paging request at 000000000002a9c0
[ 2394.810610] PGD 0 P4D 0
[ 2394.811811] Oops: 0002 [#1] SMP NOPTI
...
[ 2394.815482] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0
...
[ 2394.829505] Call Trace:
[ 2394.830667]  _raw_spin_lock_irq+0x23/0x26
[ 2394.831858]  cmd_status_err+0x55/0x110 [mlx5_core]
[ 2394.833020]  mlx5_access_reg+0xe7/0x150 [mlx5_core]
[ 2394.834175]  mlx5_query_port_ptys+0x78/0xa0 [mlx5_core]
[ 2394.835337]  mlx5e_ethtool_get_link_ksettings+0x74/0x590 [mlx5_core]
[ 2394.836454]  ? kmem_cache_alloc_trace+0x140/0x1c0
[ 2394.837562]  __rh_call_get_link_ksettings+0x33/0x100
[ 2394.838663]  ? __rtnl_unlock+0x25/0x50
[ 2394.839755]  __ethtool_get_link_ksettings+0x72/0x150
[ 2394.840862]  duplex_show+0x6e/0xc0
[ 2394.841963]  dev_attr_show+0x1c/0x40
[ 2394.843048]  sysfs_kf_seq_show+0x9b/0x100
[ 2394.844123]  seq_read+0x153/0x410
[ 2394.845187]  vfs_read+0x91/0x140
[ 2394.846226]  ksys_read+0x4f/0xb0
[ 2394.847234]  do_syscall_64+0x5b/0x1a0
[ 2394.848228]  entry_SYSCALL_64_after_hwframe+0x65/0xca

Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 13 ++-----------
 include/linux/mlx5/driver.h                   |  2 +-
 2 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index d3ca745d107d..c837103a9ee3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -2176,15 +2176,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 		return -EINVAL;
 	}
 
-	cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL);
-	if (!cmd->stats)
-		return -ENOMEM;
-
 	cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
-	if (!cmd->pool) {
-		err = -ENOMEM;
-		goto dma_pool_err;
-	}
+	if (!cmd->pool)
+		return -ENOMEM;
 
 	err = alloc_cmd_page(dev, cmd);
 	if (err)
@@ -2268,8 +2262,6 @@ err_free_page:
 
 err_free_pool:
 	dma_pool_destroy(cmd->pool);
-dma_pool_err:
-	kvfree(cmd->stats);
 	return err;
 }
 
@@ -2282,7 +2274,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
 	destroy_msg_cache(dev);
 	free_cmd_page(dev, cmd);
 	dma_pool_destroy(cmd->pool);
-	kvfree(cmd->stats);
 }
 
 void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d476255c9a3f..76ef2e4fde38 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -315,7 +315,7 @@ struct mlx5_cmd {
 	struct mlx5_cmd_debug dbg;
 	struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
 	int checksum_disabled;
-	struct mlx5_cmd_stats *stats;
+	struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
 };
 
 struct mlx5_cmd_mailbox {
-- 
cgit v1.3.1


From f64e4275ef7407d5c3eca20436519bbd1f796e40 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 10 Jan 2023 16:30:28 +0100
Subject: ACPI: Fix selecting wrong ACPI fwnode for the iGPU on some Dell
 laptops

The Dell Latitude E6430 both with and without the optional NVidia dGPU
has a bug in its ACPI tables which is causing Linux to assign the wrong
ACPI fwnode / companion to the pci_device for the i915 iGPU.

Specifically under the PCI root bridge there are these 2 ACPI Device()s :

 Scope (_SB.PCI0)
 {
     Device (GFX0)
     {
         Name (_ADR, 0x00020000)  // _ADR: Address
     }

     ...

     Device (VID)
     {
         Name (_ADR, 0x00020000)  // _ADR: Address
         ...

         Method (_DOS, 1, NotSerialized)  // _DOS: Disable Output Switching
         {
             VDP8 = Arg0
             VDP1 (One, VDP8)
         }

         Method (_DOD, 0, NotSerialized)  // _DOD: Display Output Devices
         {
             ...
         }
         ...
     }
 }

The non-functional GFX0 ACPI device is a problem, because this gets
returned as ACPI companion-device by acpi_find_child_device() for the iGPU.

This is a long standing problem and the i915 driver does use the ACPI
companion for some things, but works fine without it.

However since commit 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
acpi_get_pci_dev() relies on the physical-node pointer in the acpi_device
and that is set on the wrong acpi_device because of the wrong
acpi_find_child_device() return. This breaks the ACPI video code,
leading to non working backlight control in some cases.

Add a type.backlight flag, mark ACPI video bus devices with this and make
find_child_checks() return a higher score for children with this flag set,
so that it picks the right companion-device.

Fixes: 63f534b8bad9 ("ACPI: PCI: Rework acpi_get_pci_dev()")
Co-developed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Cc: 6.1+ <stable@vger.kernel.org> # 6.1+
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/glue.c     | 14 ++++++++++++--
 drivers/acpi/scan.c     |  7 +++++--
 include/acpi/acpi_bus.h |  3 ++-
 3 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c
index 204fe94c7e45..a194f30876c5 100644
--- a/drivers/acpi/glue.c
+++ b/drivers/acpi/glue.c
@@ -75,7 +75,8 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
 }
 
 #define FIND_CHILD_MIN_SCORE	1
-#define FIND_CHILD_MAX_SCORE	2
+#define FIND_CHILD_MID_SCORE	2
+#define FIND_CHILD_MAX_SCORE	3
 
 static int match_any(struct acpi_device *adev, void *not_used)
 {
@@ -96,8 +97,17 @@ static int find_child_checks(struct acpi_device *adev, bool check_children)
 		return -ENODEV;
 
 	status = acpi_evaluate_integer(adev->handle, "_STA", NULL, &sta);
-	if (status == AE_NOT_FOUND)
+	if (status == AE_NOT_FOUND) {
+		/*
+		 * Special case: backlight device objects without _STA are
+		 * preferred to other objects with the same _ADR value, because
+		 * it is more likely that they are actually useful.
+		 */
+		if (adev->pnp.type.backlight)
+			return FIND_CHILD_MID_SCORE;
+
 		return FIND_CHILD_MIN_SCORE;
+	}
 
 	if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
 		return -ENODEV;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 274344434282..0c6f06abe3f4 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1370,9 +1370,12 @@ static void acpi_set_pnp_ids(acpi_handle handle, struct acpi_device_pnp *pnp,
 		 * Some devices don't reliably have _HIDs & _CIDs, so add
 		 * synthetic HIDs to make sure drivers can find them.
 		 */
-		if (acpi_is_video_device(handle))
+		if (acpi_is_video_device(handle)) {
 			acpi_add_id(pnp, ACPI_VIDEO_HID);
-		else if (acpi_bay_match(handle))
+			pnp->type.backlight = 1;
+			break;
+		}
+		if (acpi_bay_match(handle))
 			acpi_add_id(pnp, ACPI_BAY_HID);
 		else if (acpi_dock_match(handle))
 			acpi_add_id(pnp, ACPI_DOCK_HID);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index cd3b75e08ec3..e44be31115a6 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -230,7 +230,8 @@ struct acpi_pnp_type {
 	u32 hardware_id:1;
 	u32 bus_address:1;
 	u32 platform_id:1;
-	u32 reserved:29;
+	u32 backlight:1;
+	u32 reserved:28;
 };
 
 struct acpi_device_pnp {
-- 
cgit v1.3.1


From ed058eab22d64c00663563e8e1e112989c65c59f Mon Sep 17 00:00:00 2001
From: Henning Schild <henning.schild@siemens.com>
Date: Thu, 22 Dec 2022 11:37:19 +0100
Subject: platform/x86: simatic-ipc: correct name of a model

What we called IPC427G should be renamed to BX-39A to be more in line
with the actual product name.

Signed-off-by: Henning Schild <henning.schild@siemens.com>
Link: https://lore.kernel.org/r/20221222103720.8546-2-henning.schild@siemens.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/simatic-ipc.c            | 2 +-
 include/linux/platform_data/x86/simatic-ipc.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c
index ca76076fc706..2ab1f8da32b0 100644
--- a/drivers/platform/x86/simatic-ipc.c
+++ b/drivers/platform/x86/simatic-ipc.c
@@ -46,7 +46,7 @@ static struct {
 	{SIMATIC_IPC_IPC427D, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_NONE},
 	{SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E},
 	{SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E},
-	{SIMATIC_IPC_IPC427G, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
+	{SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
 };
 
 static int register_platform_devices(u32 station_id)
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
index 632320ec8f08..a4a6cba412cb 100644
--- a/include/linux/platform_data/x86/simatic-ipc.h
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -32,7 +32,7 @@ enum simatic_ipc_station_ids {
 	SIMATIC_IPC_IPC477E = 0x00000A02,
 	SIMATIC_IPC_IPC127E = 0x00000D01,
 	SIMATIC_IPC_IPC227G = 0x00000F01,
-	SIMATIC_IPC_IPC427G = 0x00001001,
+	SIMATIC_IPC_IPCBX_39A = 0x00001001,
 };
 
 static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
-- 
cgit v1.3.1


From d348b1d761e358a4ba03fb34aa7e3dbd278db236 Mon Sep 17 00:00:00 2001
From: Henning Schild <henning.schild@siemens.com>
Date: Thu, 22 Dec 2022 11:37:20 +0100
Subject: platform/x86: simatic-ipc: add another model

Add IPC PX-39A support.

Signed-off-by: Henning Schild <henning.schild@siemens.com>
Link: https://lore.kernel.org/r/20221222103720.8546-3-henning.schild@siemens.com
Reviewed-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/platform/x86/simatic-ipc.c            | 1 +
 include/linux/platform_data/x86/simatic-ipc.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/platform/x86/simatic-ipc.c b/drivers/platform/x86/simatic-ipc.c
index 2ab1f8da32b0..b3622419cd1a 100644
--- a/drivers/platform/x86/simatic-ipc.c
+++ b/drivers/platform/x86/simatic-ipc.c
@@ -47,6 +47,7 @@ static struct {
 	{SIMATIC_IPC_IPC427E, SIMATIC_IPC_DEVICE_427E, SIMATIC_IPC_DEVICE_427E},
 	{SIMATIC_IPC_IPC477E, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_427E},
 	{SIMATIC_IPC_IPCBX_39A, SIMATIC_IPC_DEVICE_227G, SIMATIC_IPC_DEVICE_227G},
+	{SIMATIC_IPC_IPCPX_39A, SIMATIC_IPC_DEVICE_NONE, SIMATIC_IPC_DEVICE_227G},
 };
 
 static int register_platform_devices(u32 station_id)
diff --git a/include/linux/platform_data/x86/simatic-ipc.h b/include/linux/platform_data/x86/simatic-ipc.h
index a4a6cba412cb..a48bb5240977 100644
--- a/include/linux/platform_data/x86/simatic-ipc.h
+++ b/include/linux/platform_data/x86/simatic-ipc.h
@@ -33,6 +33,7 @@ enum simatic_ipc_station_ids {
 	SIMATIC_IPC_IPC127E = 0x00000D01,
 	SIMATIC_IPC_IPC227G = 0x00000F01,
 	SIMATIC_IPC_IPCBX_39A = 0x00001001,
+	SIMATIC_IPC_IPCPX_39A = 0x00001002,
 };
 
 static inline u32 simatic_ipc_get_station_id(u8 *data, int max_len)
-- 
cgit v1.3.1


From d3f450533bbcb6dd4d7d59cadc9b61b7321e4ac1 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ardb@kernel.org>
Date: Mon, 9 Jan 2023 10:44:31 +0100
Subject: efi: tpm: Avoid READ_ONCE() for accessing the event log

Nathan reports that recent kernels built with LTO will crash when doing
EFI boot using Fedora's GRUB and SHIM. The culprit turns out to be a
misaligned load from the TPM event log, which is annotated with
READ_ONCE(), and under LTO, this gets translated into a LDAR instruction
which does not tolerate misaligned accesses.

Interestingly, this does not happen when booting the same kernel
straight from the UEFI shell, and so the fact that the event log may
appear misaligned in memory may be caused by a bug in GRUB or SHIM.

However, using READ_ONCE() to access firmware tables is slightly unusual
in any case, and here, we only need to ensure that 'event' is not
dereferenced again after it gets unmapped, but this is already taken
care of by the implicit barrier() semantics of the early_memunmap()
call.

Cc: <stable@vger.kernel.org>
Cc: Peter Jones <pjones@redhat.com>
Cc: Jarkko Sakkinen <jarkko@kernel.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Reported-by: Nathan Chancellor <nathan@kernel.org>
Tested-by: Nathan Chancellor <nathan@kernel.org>
Link: https://github.com/ClangBuiltLinux/linux/issues/1782
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 include/linux/tpm_eventlog.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
index 20c0ff54b7a0..7d68a5cc5881 100644
--- a/include/linux/tpm_eventlog.h
+++ b/include/linux/tpm_eventlog.h
@@ -198,8 +198,8 @@ static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *ev
 	 * The loop below will unmap these fields if the log is larger than
 	 * one page, so save them here for reference:
 	 */
-	count = READ_ONCE(event->count);
-	event_type = READ_ONCE(event->event_type);
+	count = event->count;
+	event_type = event->event_type;
 
 	/* Verify that it's the log header */
 	if (event_header->pcr_idx != 0 ||
-- 
cgit v1.3.1