summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2022-12-29 11:31:45 -0700
committerJens Axboe <axboe@kernel.dk>2022-12-29 11:31:45 -0700
commit1551ed5a178ca030adc92b1eb29157b5e92bf134 (patch)
treeab215282cc41ee3edaf820f95620192927c7d69c
parent88d356ca41ba1c3effc2d4208dfbd4392f58cd6d (diff)
parent76807fcd73b818eb9f245ef1035aed34ecdd9813 (diff)
Merge tag 'nvme-6.2-2022-12-29' of git://git.infradead.org/nvme into block-6.2
Pull NVMe fixes from Christoph: "nvme fixes for Linux 6.2 - fix various problems in handling the Command Supported and Effects log (Christoph Hellwig) - don't allow unprivileged passthrough of commands that don't transfer data but modify logical block content (Christoph Hellwig) - add a features and quirks policy document (Christoph Hellwig) - fix some really nasty code that was correct but made smatch complain (Sagi Grimberg)" * tag 'nvme-6.2-2022-12-29' of git://git.infradead.org/nvme: nvme-auth: fix smatch warning complaints nvme: consult the CSE log page for unprivileged passthrough nvme: also return I/O command effects from nvme_command_effects nvmet: don't defer passthrough commands with trivial effects to the workqueue nvmet: set the LBCC bit for commands that modify data nvmet: use NVME_CMD_EFFECTS_CSUPP instead of open coding it nvme: fix the NVME_CMD_EFFECTS_CSE_MASK definition docs, nvme: add a feature and quirk policy document
-rw-r--r--Documentation/maintainer/maintainer-entry-profile.rst1
-rw-r--r--Documentation/nvme/feature-and-quirk-policy.rst77
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/nvme/host/auth.c2
-rw-r--r--drivers/nvme/host/core.c32
-rw-r--r--drivers/nvme/host/ioctl.c28
-rw-r--r--drivers/nvme/target/admin-cmd.c37
-rw-r--r--drivers/nvme/target/passthru.c11
-rw-r--r--include/linux/nvme.h4
9 files changed, 159 insertions, 34 deletions
diff --git a/Documentation/maintainer/maintainer-entry-profile.rst b/Documentation/maintainer/maintainer-entry-profile.rst
index 93b2ae6c34a9..cfd37f31077f 100644
--- a/Documentation/maintainer/maintainer-entry-profile.rst
+++ b/Documentation/maintainer/maintainer-entry-profile.rst
@@ -104,3 +104,4 @@ to do something different in the near future.
../riscv/patch-acceptance
../driver-api/media/maintainer-entry-profile
../driver-api/vfio-pci-device-specific-driver-acceptance
+ ../nvme/feature-and-quirk-policy
diff --git a/Documentation/nvme/feature-and-quirk-policy.rst b/Documentation/nvme/feature-and-quirk-policy.rst
new file mode 100644
index 000000000000..c01d836d8e41
--- /dev/null
+++ b/Documentation/nvme/feature-and-quirk-policy.rst
@@ -0,0 +1,77 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Linux NVMe feature and and quirk policy
+=======================================
+
+This file explains the policy used to decide what is supported by the
+Linux NVMe driver and what is not.
+
+
+Introduction
+============
+
+NVM Express is an open collection of standards and information.
+
+The Linux NVMe host driver in drivers/nvme/host/ supports devices
+implementing the NVM Express (NVMe) family of specifications, which
+currently consists of a number of documents:
+
+ - the NVMe Base specification
+ - various Command Set specifications (e.g. NVM Command Set)
+ - various Transport specifications (e.g. PCIe, Fibre Channel, RDMA, TCP)
+ - the NVMe Management Interface specification
+
+See https://nvmexpress.org/developers/ for the NVMe specifications.
+
+
+Supported features
+==================
+
+NVMe is a large suite of specifications, and contains features that are only
+useful or suitable for specific use-cases. It is important to note that Linux
+does not aim to implement every feature in the specification. Every additional
+feature implemented introduces more code, more maintenance and potentially more
+bugs. Hence there is an inherent tradeoff between functionality and
+maintainability of the NVMe host driver.
+
+Any feature implemented in the Linux NVMe host driver must support the
+following requirements:
+
+ 1. The feature is specified in a release version of an official NVMe
+ specification, or in a ratified Technical Proposal (TP) that is
+ available on NVMe website. Or if it is not directly related to the
+ on-wire protocol, does not contradict any of the NVMe specifications.
+ 2. Does not conflict with the Linux architecture, nor the design of the
+ NVMe host driver.
+ 3. Has a clear, indisputable value-proposition and a wide consensus across
+ the community.
+
+Vendor specific extensions are generally not supported in the NVMe host
+driver.
+
+It is strongly recommended to work with the Linux NVMe and block layer
+maintainers and get feedback on specification changes that are intended
+to be used by the Linux NVMe host driver in order to avoid conflict at a
+later stage.
+
+
+Quirks
+======
+
+Sometimes implementations of open standards fail to correctly implement parts
+of the standards. Linux uses identifier-based quirks to work around such
+implementation bugs. The intent of quirks is to deal with widely available
+hardware, usually consumer, which Linux users can't use without these quirks.
+Typically these implementations are not or only superficially tested with Linux
+by the hardware manufacturer.
+
+The Linux NVMe maintainers decide ad hoc whether to quirk implementations
+based on the impact of the problem to Linux users and how it impacts
+maintainability of the driver. In general quirks are a last resort, if no
+firmware updates or other workarounds are available from the vendor.
+
+Quirks will not be added to the Linux kernel for hardware that isn't available
+on the mass market. Hardware that fails qualification for enterprise Linux
+distributions, ChromeOS, Android or other consumers of the Linux kernel
+should be fixed before it is shipped instead of relying on Linux quirks.
diff --git a/MAINTAINERS b/MAINTAINERS
index bb77a3ed9d54..d53b3a6cdc67 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14827,6 +14827,7 @@ L: linux-nvme@lists.infradead.org
S: Supported
W: http://git.infradead.org/nvme.git
T: git://git.infradead.org/nvme.git
+F: Documentation/nvme/
F: drivers/nvme/host/
F: drivers/nvme/common/
F: include/linux/nvme*
diff --git a/drivers/nvme/host/auth.c b/drivers/nvme/host/auth.c
index bb0abbe4491c..4424f53a8a0a 100644
--- a/drivers/nvme/host/auth.c
+++ b/drivers/nvme/host/auth.c
@@ -953,7 +953,7 @@ int nvme_auth_init_ctrl(struct nvme_ctrl *ctrl)
goto err_free_dhchap_secret;
if (!ctrl->opts->dhchap_secret && !ctrl->opts->dhchap_ctrl_secret)
- return ret;
+ return 0;
ctrl->dhchap_ctxs = kvcalloc(ctrl_max_dhchaps(ctrl),
sizeof(*chap), GFP_KERNEL);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cda1361e6d4f..d307ae4d8a57 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1074,6 +1074,18 @@ static u32 nvme_known_admin_effects(u8 opcode)
return 0;
}
+static u32 nvme_known_nvm_effects(u8 opcode)
+{
+ switch (opcode) {
+ case nvme_cmd_write:
+ case nvme_cmd_write_zeroes:
+ case nvme_cmd_write_uncor:
+ return NVME_CMD_EFFECTS_LBCC;
+ default:
+ return 0;
+ }
+}
+
u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
{
u32 effects = 0;
@@ -1081,16 +1093,24 @@ u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
if (ns) {
if (ns->head->effects)
effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
+ if (ns->head->ids.csi == NVME_CAP_CSS_NVM)
+ effects |= nvme_known_nvm_effects(opcode);
if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
dev_warn_once(ctrl->device,
- "IO command:%02x has unhandled effects:%08x\n",
+ "IO command:%02x has unusual effects:%08x\n",
opcode, effects);
- return 0;
- }
- if (ctrl->effects)
- effects = le32_to_cpu(ctrl->effects->acs[opcode]);
- effects |= nvme_known_admin_effects(opcode);
+ /*
+ * NVME_CMD_EFFECTS_CSE_MASK causes a freeze all I/O queues,
+ * which would deadlock when done on an I/O command. Note that
+ * We already warn about an unusual effect above.
+ */
+ effects &= ~NVME_CMD_EFFECTS_CSE_MASK;
+ } else {
+ if (ctrl->effects)
+ effects = le32_to_cpu(ctrl->effects->acs[opcode]);
+ effects |= nvme_known_admin_effects(opcode);
+ }
return effects;
}
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 9ddda571f046..a8639919237e 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -11,6 +11,8 @@
static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
fmode_t mode)
{
+ u32 effects;
+
if (capable(CAP_SYS_ADMIN))
return true;
@@ -43,11 +45,29 @@ static bool nvme_cmd_allowed(struct nvme_ns *ns, struct nvme_command *c,
}
/*
- * Only allow I/O commands that transfer data to the controller if the
- * special file is open for writing, but always allow I/O commands that
- * transfer data from the controller.
+ * Check if the controller provides a Commands Supported and Effects log
+ * and marks this command as supported. If not reject unprivileged
+ * passthrough.
+ */
+ effects = nvme_command_effects(ns->ctrl, ns, c->common.opcode);
+ if (!(effects & NVME_CMD_EFFECTS_CSUPP))
+ return false;
+
+ /*
+ * Don't allow passthrough for command that have intrusive (or unknown)
+ * effects.
+ */
+ if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
+ NVME_CMD_EFFECTS_UUID_SEL |
+ NVME_CMD_EFFECTS_SCOPE_MASK))
+ return false;
+
+ /*
+ * Only allow I/O commands that transfer data to the controller or that
+ * change the logical block contents if the file descriptor is open for
+ * writing.
*/
- if (nvme_is_write(c))
+ if (nvme_is_write(c) || (effects & NVME_CMD_EFFECTS_LBCC))
return mode & FMODE_WRITE;
return true;
}
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 53a004ea320c..6a54ed6fb121 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -164,26 +164,31 @@ out:
static void nvmet_get_cmd_effects_nvm(struct nvme_effects_log *log)
{
- log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0);
- log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0);
- log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0);
- log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0);
- log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0);
- log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0);
- log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0);
-
- log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0);
- log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0);
- log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0);
- log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0);
- log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0);
+ log->acs[nvme_admin_get_log_page] =
+ log->acs[nvme_admin_identify] =
+ log->acs[nvme_admin_abort_cmd] =
+ log->acs[nvme_admin_set_features] =
+ log->acs[nvme_admin_get_features] =
+ log->acs[nvme_admin_async_event] =
+ log->acs[nvme_admin_keep_alive] =
+ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+
+ log->iocs[nvme_cmd_read] =
+ log->iocs[nvme_cmd_flush] =
+ log->iocs[nvme_cmd_dsm] =
+ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
+ log->iocs[nvme_cmd_write] =
+ log->iocs[nvme_cmd_write_zeroes] =
+ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC);
}
static void nvmet_get_cmd_effects_zns(struct nvme_effects_log *log)
{
- log->iocs[nvme_cmd_zone_append] = cpu_to_le32(1 << 0);
- log->iocs[nvme_cmd_zone_mgmt_send] = cpu_to_le32(1 << 0);
- log->iocs[nvme_cmd_zone_mgmt_recv] = cpu_to_le32(1 << 0);
+ log->iocs[nvme_cmd_zone_append] =
+ log->iocs[nvme_cmd_zone_mgmt_send] =
+ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC);
+ log->iocs[nvme_cmd_zone_mgmt_recv] =
+ cpu_to_le32(NVME_CMD_EFFECTS_CSUPP);
}
static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req)
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c
index 79af5140af8b..adc0958755d6 100644
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -334,14 +334,13 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
}
/*
- * If there are effects for the command we are about to execute, or
- * an end_req function we need to use nvme_execute_passthru_rq()
- * synchronously in a work item seeing the end_req function and
- * nvme_passthru_end() can't be called in the request done callback
- * which is typically in interrupt context.
+ * If a command needs post-execution fixups, or there are any
+ * non-trivial effects, make sure to execute the command synchronously
+ * in a workqueue so that nvme_passthru_end gets called.
*/
effects = nvme_command_effects(ctrl, ns, req->cmd->common.opcode);
- if (req->p.use_workqueue || effects) {
+ if (req->p.use_workqueue ||
+ (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))) {
INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work);
req->p.rq = rq;
queue_work(nvmet_wq, &req->p.work);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index d6be2a686100..4fad4aa245fb 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -7,6 +7,7 @@
#ifndef _LINUX_NVME_H
#define _LINUX_NVME_H
+#include <linux/bits.h>
#include <linux/types.h>
#include <linux/uuid.h>
@@ -639,8 +640,9 @@ enum {
NVME_CMD_EFFECTS_NCC = 1 << 2,
NVME_CMD_EFFECTS_NIC = 1 << 3,
NVME_CMD_EFFECTS_CCC = 1 << 4,
- NVME_CMD_EFFECTS_CSE_MASK = 3 << 16,
+ NVME_CMD_EFFECTS_CSE_MASK = GENMASK(18, 16),
NVME_CMD_EFFECTS_UUID_SEL = 1 << 19,
+ NVME_CMD_EFFECTS_SCOPE_MASK = GENMASK(31, 20),
};
struct nvme_effects_log {