From 0b3f9c757cabad4b8101c5fcddddd029ed5506a6 Mon Sep 17 00:00:00 2001 From: Daniel Axtens Date: Fri, 14 Aug 2015 17:41:18 +1000 Subject: cxl: Drop commands if the PCI channel is not in normal state If the PCI channel has gone down, don't attempt to poke the hardware. We need to guard every time cxl_whatever_(read|write) is called. This is because a call to those functions will dereference an offset into an mmio register, and the mmio mappings get invalidated in the EEH teardown. Check in the read/write functions in the header. We give them the same semantics as usual PCI operations: - a write to a channel that is down is ignored. - a read from a channel that is down returns all fs. Also, we try to access the MMIO space of a vPHB device as part of the PCI disable path. Because that's a read that bypasses most of our usual checks, we handle it explicitly. As far as user visible warnings go: - Check link state in file ops, return -EIO if down. - Be reasonably quiet if there's an error in a teardown path, or when we already know the hardware is going down. - Throw a big WARN if someone tries to start a CXL operation while the card is down. This gives a useful stacktrace for debugging whatever is doing that. Signed-off-by: Daniel Axtens Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/misc/cxl/context.c') diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 1287148629c0..615842115848 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -193,7 +193,11 @@ int __detach_context(struct cxl_context *ctx) if (status != STARTED) return -EBUSY; - WARN_ON(cxl_detach_process(ctx)); + /* Only warn if we detached while the link was OK. + * If detach fails when hw is down, we don't care. + */ + WARN_ON(cxl_detach_process(ctx) && + cxl_adapter_link_ok(ctx->afu->adapter)); flush_work(&ctx->fault_work); /* Only needed for dedicated process */ put_pid(ctx->pid); cxl_ctx_put(); -- cgit v1.2.3-70-g09d2 From d9232a3da8683cd9c9854a858bcca968fe5f3bca Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 23 Jul 2015 16:43:56 +1000 Subject: cxl: Add alternate MMIO error handling userspace programs using cxl currently have to use two strategies for dealing with MMIO errors simultaneously. They have to check every read for a return of all Fs in case the adapter has gone away and the kernel has not yet noticed, and they have to deal with SIGBUS in case the kernel has already noticed, invalidated the mapping and marked the context as failed. In order to simplify things, this patch adds an alternative approach where the kernel will return a page filled with Fs instead of delivering a SIGBUS. This allows userspace to only need to deal with one of these two error paths, and is intended for use in libraries that use cxl transparently and may not be able to safely install a signal handler. This approach will only work if certain constraints are met. Namely, if the application is both reading and writing to an address in the problem state area it cannot assume that a non-FF read is OK, as it may just be reading out a value it has previously written. Further - since only one page is used per context a write to a given offset would be visible when reading the same offset from a different page in the mapping (this only applies within a single context, not between contexts). An application could deal with this by e.g. making sure it also reads from a read-only offset after any reads to a read/write offset. Due to these constraints, this functionality must be explicitly requested by userspace when starting the context by passing in the CXL_START_WORK_ERR_FF flag. Signed-off-by: Ian Munsie Acked-by: Michael Neuling Signed-off-by: Michael Ellerman --- drivers/misc/cxl/context.c | 14 ++++++++++++++ drivers/misc/cxl/cxl.h | 4 +++- drivers/misc/cxl/file.c | 4 +++- include/uapi/misc/cxl.h | 4 +++- 4 files changed, 23 insertions(+), 3 deletions(-) (limited to 'drivers/misc/cxl/context.c') diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 615842115848..941fda04aa9a 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -126,6 +126,18 @@ static int cxl_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) if (ctx->status != STARTED) { mutex_unlock(&ctx->status_mutex); pr_devel("%s: Context not started, failing problem state access\n", __func__); + if (ctx->mmio_err_ff) { + if (!ctx->ff_page) { + ctx->ff_page = alloc_page(GFP_USER); + if (!ctx->ff_page) + return VM_FAULT_OOM; + memset(page_address(ctx->ff_page), 0xff, PAGE_SIZE); + } + get_page(ctx->ff_page); + vmf->page = ctx->ff_page; + vma->vm_page_prot = pgprot_cached(vma->vm_page_prot); + return 0; + } return VM_FAULT_SIGBUS; } @@ -257,6 +269,8 @@ static void reclaim_ctx(struct rcu_head *rcu) struct cxl_context *ctx = container_of(rcu, struct cxl_context, rcu); free_page((u64)ctx->sstp); + if (ctx->ff_page) + __free_page(ctx->ff_page); ctx->sstp = NULL; kfree(ctx); diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 6f5386653dae..e7af256f60c5 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -34,7 +34,7 @@ extern uint cxl_verbose; * Bump version each time a user API change is made, whether it is * backwards compatible ot not. */ -#define CXL_API_VERSION 1 +#define CXL_API_VERSION 2 #define CXL_API_VERSION_COMPATIBLE 1 /* @@ -418,6 +418,8 @@ struct cxl_context { /* Used to unmap any mmaps when force detaching */ struct address_space *mapping; struct mutex mapping_lock; + struct page *ff_page; + bool mmio_err_ff; spinlock_t sste_lock; /* Protects segment table entries */ struct cxl_sste *sstp; diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 57bdb473749f..a30bf285b5bd 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -184,6 +184,8 @@ static long afu_ioctl_start_work(struct cxl_context *ctx, if (work.flags & CXL_START_WORK_AMR) amr = work.amr & mfspr(SPRN_UAMOR); + ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF); + /* * We grab the PID here and not in the file open to allow for the case * where a process (master, some daemon, etc) has opened the chardev on @@ -538,7 +540,7 @@ int __init cxl_file_init(void) * If these change we really need to update API. Either change some * flags or update API version number CXL_API_VERSION. */ - BUILD_BUG_ON(CXL_API_VERSION != 1); + BUILD_BUG_ON(CXL_API_VERSION != 2); BUILD_BUG_ON(sizeof(struct cxl_ioctl_start_work) != 64); BUILD_BUG_ON(sizeof(struct cxl_event_header) != 8); BUILD_BUG_ON(sizeof(struct cxl_event_afu_interrupt) != 8); diff --git a/include/uapi/misc/cxl.h b/include/uapi/misc/cxl.h index 99a8ca15fe64..1e889aa8a36e 100644 --- a/include/uapi/misc/cxl.h +++ b/include/uapi/misc/cxl.h @@ -29,8 +29,10 @@ struct cxl_ioctl_start_work { #define CXL_START_WORK_AMR 0x0000000000000001ULL #define CXL_START_WORK_NUM_IRQS 0x0000000000000002ULL +#define CXL_START_WORK_ERR_FF 0x0000000000000004ULL #define CXL_START_WORK_ALL (CXL_START_WORK_AMR |\ - CXL_START_WORK_NUM_IRQS) + CXL_START_WORK_NUM_IRQS |\ + CXL_START_WORK_ERR_FF) /* Possible modes that an afu can be in */ -- cgit v1.2.3-70-g09d2 From 55e07668fbba9466e6a9ef7650718356cda38406 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Thu, 27 Aug 2015 19:50:19 +1000 Subject: cxl: Fix force unmapping mmaps of contexts allocated through the kernel api The cxl user api uses the address_space associated with the file when we need to force unmap all cxl mmap regions (e.g. on eeh, driver detach, etc). Currently, contexts allocated through the kernel api do not do this and instead skip the mmap invalidation, potentially allowing them to poke at the hardware after such an event, which may cause all sorts of trouble. This patch allocates an address_space for cxl contexts allocated through the kernel api so that the same invalidate path will for these contexts as well. We don't use the anonymous inode's address_space, as doing so could invalidate any mmaps of completely unrelated drivers using anonymous file descriptors. This patch also introduces a kernelapi flag, so we know when freeing the context if the address_space was allocated by us and needs to be freed. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/api.c | 33 ++++++++++++++++++++++++++++++--- drivers/misc/cxl/context.c | 2 ++ drivers/misc/cxl/cxl.h | 1 + 3 files changed, 33 insertions(+), 3 deletions(-) (limited to 'drivers/misc/cxl/context.c') diff --git a/drivers/misc/cxl/api.c b/drivers/misc/cxl/api.c index 005adc7d33a2..8af12c884b04 100644 --- a/drivers/misc/cxl/api.c +++ b/drivers/misc/cxl/api.c @@ -12,11 +12,13 @@ #include #include #include +#include #include "cxl.h" struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) { + struct address_space *mapping; struct cxl_afu *afu; struct cxl_context *ctx; int rc; @@ -30,14 +32,32 @@ struct cxl_context *cxl_dev_context_init(struct pci_dev *dev) goto err_dev; } + ctx->kernelapi = true; + + /* + * Make our own address space since we won't have one from the + * filesystem like the user api has, and even if we do associate a file + * with this context we don't want to use the global anonymous inode's + * address space as that can invalidate unrelated users: + */ + mapping = kmalloc(sizeof(struct address_space), GFP_KERNEL); + if (!mapping) { + rc = -ENOMEM; + goto err_ctx; + } + address_space_init_once(mapping); + /* Make it a slave context. We can promote it later? */ - rc = cxl_context_init(ctx, afu, false, NULL); + rc = cxl_context_init(ctx, afu, false, mapping); if (rc) - goto err_ctx; + goto err_mapping; + cxl_assign_psn_space(ctx); return ctx; +err_mapping: + kfree(mapping); err_ctx: kfree(ctx); err_dev: @@ -260,9 +280,16 @@ struct file *cxl_get_fd(struct cxl_context *ctx, struct file_operations *fops, file = anon_inode_getfile("cxl", fops, ctx, flags); if (IS_ERR(file)) - put_unused_fd(fdtmp); + goto err_fd; + + file->f_mapping = ctx->mapping; + *fd = fdtmp; return file; + +err_fd: + put_unused_fd(fdtmp); + return NULL; } EXPORT_SYMBOL_GPL(cxl_get_fd); diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c index 941fda04aa9a..e762f85ee233 100644 --- a/drivers/misc/cxl/context.c +++ b/drivers/misc/cxl/context.c @@ -272,6 +272,8 @@ static void reclaim_ctx(struct rcu_head *rcu) if (ctx->ff_page) __free_page(ctx->ff_page); ctx->sstp = NULL; + if (ctx->kernelapi) + kfree(ctx->mapping); kfree(ctx); } diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index e7af256f60c5..d6566c60bcb0 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -420,6 +420,7 @@ struct cxl_context { struct mutex mapping_lock; struct page *ff_page; bool mmio_err_ff; + bool kernelapi; spinlock_t sste_lock; /* Protects segment table entries */ struct cxl_sste *sstp; -- cgit v1.2.3-70-g09d2