summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/acpi/nfit.c798
-rw-r--r--drivers/acpi/nfit.h30
-rw-r--r--drivers/nvdimm/blk.c18
-rw-r--r--drivers/nvdimm/btt.c19
-rw-r--r--drivers/nvdimm/bus.c131
-rw-r--r--drivers/nvdimm/core.c110
-rw-r--r--drivers/nvdimm/dimm_devs.c6
-rw-r--r--drivers/nvdimm/namespace_devs.c7
-rw-r--r--drivers/nvdimm/nd.h4
-rw-r--r--drivers/nvdimm/pfn.h23
-rw-r--r--drivers/nvdimm/pfn_devs.c61
-rw-r--r--drivers/nvdimm/pmem.c219
-rw-r--r--drivers/nvdimm/region.c12
13 files changed, 1071 insertions, 367 deletions
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 35947ac87644..d0f35e63640b 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -21,6 +21,7 @@
#include <linux/sort.h>
#include <linux/pmem.h>
#include <linux/io.h>
+#include <linux/nd.h>
#include <asm/cacheflush.h>
#include "nfit.h"
@@ -34,6 +35,18 @@ static bool force_enable_dimms;
module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
+module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
+
+/* after three payloads of overflow, it's dead jim */
+static unsigned int scrub_overflow_abort = 3;
+module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(scrub_overflow_abort,
+ "Number of times we overflow ARS results before abort");
+
+static struct workqueue_struct *nfit_wq;
+
struct nfit_table_prev {
struct list_head spas;
struct list_head memdevs;
@@ -72,9 +85,90 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
return to_acpi_device(acpi_desc->dev);
}
+static int xlat_status(void *buf, unsigned int cmd)
+{
+ struct nd_cmd_clear_error *clear_err;
+ struct nd_cmd_ars_status *ars_status;
+ struct nd_cmd_ars_start *ars_start;
+ struct nd_cmd_ars_cap *ars_cap;
+ u16 flags;
+
+ switch (cmd) {
+ case ND_CMD_ARS_CAP:
+ ars_cap = buf;
+ if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE)
+ return -ENOTTY;
+
+ /* Command failed */
+ if (ars_cap->status & 0xffff)
+ return -EIO;
+
+ /* No supported scan types for this range */
+ flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE;
+ if ((ars_cap->status >> 16 & flags) == 0)
+ return -ENOTTY;
+ break;
+ case ND_CMD_ARS_START:
+ ars_start = buf;
+ /* ARS is in progress */
+ if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY)
+ return -EBUSY;
+
+ /* Command failed */
+ if (ars_start->status & 0xffff)
+ return -EIO;
+ break;
+ case ND_CMD_ARS_STATUS:
+ ars_status = buf;
+ /* Command failed */
+ if (ars_status->status & 0xffff)
+ return -EIO;
+ /* Check extended status (Upper two bytes) */
+ if (ars_status->status == NFIT_ARS_STATUS_DONE)
+ return 0;
+
+ /* ARS is in progress */
+ if (ars_status->status == NFIT_ARS_STATUS_BUSY)
+ return -EBUSY;
+
+ /* No ARS performed for the current boot */
+ if (ars_status->status == NFIT_ARS_STATUS_NONE)
+ return -EAGAIN;
+
+ /*
+ * ARS interrupted, either we overflowed or some other
+ * agent wants the scan to stop. If we didn't overflow
+ * then just continue with the returned results.
+ */
+ if (ars_status->status == NFIT_ARS_STATUS_INTR) {
+ if (ars_status->flags & NFIT_ARS_F_OVERFLOW)
+ return -ENOSPC;
+ return 0;
+ }
+
+ /* Unknown status */
+ if (ars_status->status >> 16)
+ return -EIO;
+ break;
+ case ND_CMD_CLEAR_ERROR:
+ clear_err = buf;
+ if (clear_err->status & 0xffff)
+ return -EIO;
+ if (!clear_err->cleared)
+ return -EIO;
+ if (clear_err->length > clear_err->cleared)
+ return clear_err->cleared;
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
- unsigned int buf_len)
+ unsigned int buf_len, int *cmd_rc)
{
struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
const struct nd_cmd_desc *desc = NULL;
@@ -185,6 +279,8 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
* unfilled in the output buffer
*/
rc = buf_len - offset - in_buf.buffer.length;
+ if (cmd_rc)
+ *cmd_rc = xlat_status(buf, cmd);
} else {
dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
__func__, dimm_name, cmd_name, buf_len,
@@ -675,12 +771,11 @@ static struct attribute_group acpi_nfit_attribute_group = {
.attrs = acpi_nfit_attributes,
};
-const struct attribute_group *acpi_nfit_attribute_groups[] = {
+static const struct attribute_group *acpi_nfit_attribute_groups[] = {
&nvdimm_bus_attribute_group,
&acpi_nfit_attribute_group,
NULL,
};
-EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups);
static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev)
{
@@ -917,7 +1012,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
if (!adev)
return;
- for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++)
+ for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++)
if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
set_bit(i, &nd_desc->dsm_mask);
}
@@ -1105,7 +1200,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw,
writeq(cmd, mmio->addr.base + offset);
wmb_blk(nfit_blk);
- if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH)
+ if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH)
readq(mmio->addr.base + offset);
}
@@ -1141,7 +1236,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
memcpy_to_pmem(mmio->addr.aperture + offset,
iobuf + copied, c);
else {
- if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH)
+ if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
mmio_flush_range((void __force *)
mmio->addr.aperture + offset, c);
@@ -1328,13 +1423,13 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc,
memset(&flags, 0, sizeof(flags));
rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags,
- sizeof(flags));
+ sizeof(flags), NULL);
if (rc >= 0 && flags.status == 0)
nfit_blk->dimm_flags = flags.flags;
else if (rc == -ENOTTY) {
/* fall back to a conservative default */
- nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH;
+ nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH;
rc = 0;
} else
rc = -ENXIO;
@@ -1473,93 +1568,85 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus,
/* devm will free nfit_blk */
}
-static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc,
- struct nd_cmd_ars_cap *cmd, u64 addr, u64 length)
+static int ars_get_cap(struct acpi_nfit_desc *acpi_desc,
+ struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa)
{
- cmd->address = addr;
- cmd->length = length;
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ int cmd_rc, rc;
- return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
- sizeof(*cmd));
+ cmd->address = spa->address;
+ cmd->length = spa->length;
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd,
+ sizeof(*cmd), &cmd_rc);
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
}
-static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc,
- struct nd_cmd_ars_start *cmd, u64 addr, u64 length)
+static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa)
{
int rc;
+ int cmd_rc;
+ struct nd_cmd_ars_start ars_start;
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
- cmd->address = addr;
- cmd->length = length;
- cmd->type = ND_ARS_PERSISTENT;
+ memset(&ars_start, 0, sizeof(ars_start));
+ ars_start.address = spa->address;
+ ars_start.length = spa->length;
+ if (nfit_spa_type(spa) == NFIT_SPA_PM)
+ ars_start.type = ND_ARS_PERSISTENT;
+ else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
+ ars_start.type = ND_ARS_VOLATILE;
+ else
+ return -ENOTTY;
- while (1) {
- rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd,
- sizeof(*cmd));
- if (rc)
- return rc;
- switch (cmd->status) {
- case 0:
- return 0;
- case 1:
- /* ARS unsupported, but we should never get here */
- return 0;
- case 6:
- /* ARS is in progress */
- msleep(1000);
- break;
- default:
- return -ENXIO;
- }
- }
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+ sizeof(ars_start), &cmd_rc);
+
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
}
-static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc,
- struct nd_cmd_ars_status *cmd, u32 size)
+static int ars_continue(struct acpi_nfit_desc *acpi_desc)
{
- int rc;
+ int rc, cmd_rc;
+ struct nd_cmd_ars_start ars_start;
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+ struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+
+ memset(&ars_start, 0, sizeof(ars_start));
+ ars_start.address = ars_status->restart_address;
+ ars_start.length = ars_status->restart_length;
+ ars_start.type = ars_status->type;
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start,
+ sizeof(ars_start), &cmd_rc);
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
+}
- while (1) {
- rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd,
- size);
- if (rc || cmd->status & 0xffff)
- return -ENXIO;
+static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+ struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+ int rc, cmd_rc;
- /* Check extended status (Upper two bytes) */
- switch (cmd->status >> 16) {
- case 0:
- return 0;
- case 1:
- /* ARS is in progress */
- msleep(1000);
- break;
- case 2:
- /* No ARS performed for the current boot */
- return 0;
- case 3:
- /* TODO: error list overflow support */
- default:
- return -ENXIO;
- }
- }
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
+ acpi_desc->ars_status_size, &cmd_rc);
+ if (rc < 0)
+ return rc;
+ return cmd_rc;
}
static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
- struct nd_cmd_ars_status *ars_status, u64 start)
+ struct nd_cmd_ars_status *ars_status)
{
int rc;
u32 i;
- /*
- * The address field returned by ars_status should be either
- * less than or equal to the address we last started ARS for.
- * The (start, length) returned by ars_status should also have
- * non-zero overlap with the range we started ARS for.
- * If this is not the case, bail.
- */
- if (ars_status->address > start ||
- (ars_status->address + ars_status->length < start))
- return -ENXIO;
-
for (i = 0; i < ars_status->num_records; i++) {
rc = nvdimm_bus_add_poison(nvdimm_bus,
ars_status->records[i].err_address,
@@ -1571,118 +1658,56 @@ static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus,
return 0;
}
-static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc,
- struct nd_region_desc *ndr_desc)
+static void acpi_nfit_remove_resource(void *data)
{
- struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
- struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
- struct nd_cmd_ars_status *ars_status = NULL;
- struct nd_cmd_ars_start *ars_start = NULL;
- struct nd_cmd_ars_cap *ars_cap = NULL;
- u64 start, len, cur, remaining;
- u32 ars_status_size;
- int rc;
-
- ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL);
- if (!ars_cap)
- return -ENOMEM;
+ struct resource *res = data;
- start = ndr_desc->res->start;
- len = ndr_desc->res->end - ndr_desc->res->start + 1;
-
- /*
- * If ARS is unimplemented, unsupported, or if the 'Persistent Memory
- * Scrub' flag in extended status is not set, skip this but continue
- * initialization
- */
- rc = ars_get_cap(nd_desc, ars_cap, start, len);
- if (rc == -ENOTTY) {
- dev_dbg(acpi_desc->dev,
- "Address Range Scrub is not implemented, won't create an error list\n");
- rc = 0;
- goto out;
- }
- if (rc)
- goto out;
-
- if ((ars_cap->status & 0xffff) ||
- !(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) {
- dev_warn(acpi_desc->dev,
- "ARS unsupported (status: 0x%x), won't create an error list\n",
- ars_cap->status);
- goto out;
- }
-
- /*
- * Check if a full-range ARS has been run. If so, use those results
- * without having to start a new ARS.
- */
- ars_status_size = ars_cap->max_ars_out;
- ars_status = kzalloc(ars_status_size, GFP_KERNEL);
- if (!ars_status) {
- rc = -ENOMEM;
- goto out;
- }
+ remove_resource(res);
+}
- rc = ars_get_status(nd_desc, ars_status, ars_status_size);
- if (rc)
- goto out;
+static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc,
+ struct nd_region_desc *ndr_desc)
+{
+ struct resource *res, *nd_res = ndr_desc->res;
+ int is_pmem, ret;
- if (ars_status->address <= start &&
- (ars_status->address + ars_status->length >= start + len)) {
- rc = ars_status_process_records(nvdimm_bus, ars_status, start);
- goto out;
- }
+ /* No operation if the region is already registered as PMEM */
+ is_pmem = region_intersects(nd_res->start, resource_size(nd_res),
+ IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY);
+ if (is_pmem == REGION_INTERSECTS)
+ return 0;
- /*
- * ARS_STATUS can overflow if the number of poison entries found is
- * greater than the maximum buffer size (ars_cap->max_ars_out)
- * To detect overflow, check if the length field of ars_status
- * is less than the length we supplied. If so, process the
- * error entries we got, adjust the start point, and start again
- */
- ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL);
- if (!ars_start)
+ res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL);
+ if (!res)
return -ENOMEM;
- cur = start;
- remaining = len;
- do {
- u64 done, end;
-
- rc = ars_do_start(nd_desc, ars_start, cur, remaining);
- if (rc)
- goto out;
-
- rc = ars_get_status(nd_desc, ars_status, ars_status_size);
- if (rc)
- goto out;
+ res->name = "Persistent Memory";
+ res->start = nd_res->start;
+ res->end = nd_res->end;
+ res->flags = IORESOURCE_MEM;
+ res->desc = IORES_DESC_PERSISTENT_MEMORY;
- rc = ars_status_process_records(nvdimm_bus, ars_status, cur);
- if (rc)
- goto out;
+ ret = insert_resource(&iomem_resource, res);
+ if (ret)
+ return ret;
- end = min(cur + remaining,
- ars_status->address + ars_status->length);
- done = end - cur;
- cur += done;
- remaining -= done;
- } while (remaining);
+ ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res);
+ if (ret) {
+ remove_resource(res);
+ return ret;
+ }
- out:
- kfree(ars_cap);
- kfree(ars_start);
- kfree(ars_status);
- return rc;
+ return 0;
}
static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc,
struct acpi_nfit_memory_map *memdev,
- struct acpi_nfit_system_address *spa)
+ struct nfit_spa *nfit_spa)
{
struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc,
memdev->device_handle);
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
struct nd_blk_region_desc *ndbr_desc;
struct nfit_mem *nfit_mem;
int blk_valid = 0;
@@ -1718,7 +1743,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
ndbr_desc->enable = acpi_nfit_blk_region_enable;
ndbr_desc->disable = acpi_nfit_blk_region_disable;
ndbr_desc->do_io = acpi_desc->blk_do_io;
- if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc))
+ nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
return -ENOMEM;
break;
}
@@ -1738,7 +1765,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
struct resource res;
int count = 0, rc;
- if (nfit_spa->is_registered)
+ if (nfit_spa->nd_region)
return 0;
if (spa->range_index == 0) {
@@ -1775,47 +1802,332 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
}
nd_mapping = &nd_mappings[count++];
rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc,
- memdev, spa);
+ memdev, nfit_spa);
if (rc)
- return rc;
+ goto out;
}
ndr_desc->nd_mapping = nd_mappings;
ndr_desc->num_mappings = count;
rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa);
if (rc)
- return rc;
+ goto out;
nvdimm_bus = acpi_desc->nvdimm_bus;
if (nfit_spa_type(spa) == NFIT_SPA_PM) {
- rc = acpi_nfit_find_poison(acpi_desc, ndr_desc);
+ rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc);
if (rc) {
- dev_err(acpi_desc->dev,
- "error while performing ARS to find poison: %d\n",
+ dev_warn(acpi_desc->dev,
+ "failed to insert pmem resource to iomem: %d\n",
rc);
- return rc;
+ goto out;
}
- if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc))
- return -ENOMEM;
+
+ nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
+ rc = -ENOMEM;
} else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) {
- if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc))
- return -ENOMEM;
+ nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus,
+ ndr_desc);
+ if (!nfit_spa->nd_region)
+ rc = -ENOMEM;
}
- nfit_spa->is_registered = 1;
+ out:
+ if (rc)
+ dev_err(acpi_desc->dev, "failed to register spa range %d\n",
+ nfit_spa->spa->range_index);
+ return rc;
+}
+
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
+ u32 max_ars)
+{
+ struct device *dev = acpi_desc->dev;
+ struct nd_cmd_ars_status *ars_status;
+
+ if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
+ memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+ return 0;
+ }
+
+ if (acpi_desc->ars_status)
+ devm_kfree(dev, acpi_desc->ars_status);
+ acpi_desc->ars_status = NULL;
+ ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+ if (!ars_status)
+ return -ENOMEM;
+ acpi_desc->ars_status = ars_status;
+ acpi_desc->ars_status_size = max_ars;
return 0;
}
-static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa)
{
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ int rc;
+
+ if (!nfit_spa->max_ars) {
+ struct nd_cmd_ars_cap ars_cap;
+
+ memset(&ars_cap, 0, sizeof(ars_cap));
+ rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+ if (rc < 0)
+ return rc;
+ nfit_spa->max_ars = ars_cap.max_ars_out;
+ nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+ /* check that the supported scrub types match the spa type */
+ if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
+ ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
+ return -ENOTTY;
+ else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
+ ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
+ return -ENOTTY;
+ }
+
+ if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+ return -ENOMEM;
+
+ rc = ars_get_status(acpi_desc);
+ if (rc < 0 && rc != -ENOSPC)
+ return rc;
+
+ if (ars_status_process_records(acpi_desc->nvdimm_bus,
+ acpi_desc->ars_status))
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
+ struct nfit_spa *nfit_spa)
+{
+ struct acpi_nfit_system_address *spa = nfit_spa->spa;
+ unsigned int overflow_retry = scrub_overflow_abort;
+ u64 init_ars_start = 0, init_ars_len = 0;
+ struct device *dev = acpi_desc->dev;
+ unsigned int tmo = scrub_timeout;
+ int rc;
+
+ if (nfit_spa->ars_done || !nfit_spa->nd_region)
+ return;
+
+ rc = ars_start(acpi_desc, nfit_spa);
+ /*
+ * If we timed out the initial scan we'll still be busy here,
+ * and will wait another timeout before giving up permanently.
+ */
+ if (rc < 0 && rc != -EBUSY)
+ return;
+
+ do {
+ u64 ars_start, ars_len;
+
+ if (acpi_desc->cancel)
+ break;
+ rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+ if (rc == -ENOTTY)
+ break;
+ if (rc == -EBUSY && !tmo) {
+ dev_warn(dev, "range %d ars timeout, aborting\n",
+ spa->range_index);
+ break;
+ }
+
+ if (rc == -EBUSY) {
+ /*
+ * Note, entries may be appended to the list
+ * while the lock is dropped, but the workqueue
+ * being active prevents entries being deleted /
+ * freed.
+ */
+ mutex_unlock(&acpi_desc->init_mutex);
+ ssleep(1);
+ tmo--;
+ mutex_lock(&acpi_desc->init_mutex);
+ continue;
+ }
+
+ /* we got some results, but there are more pending... */
+ if (rc == -ENOSPC && overflow_retry--) {
+ if (!init_ars_len) {
+ init_ars_len = acpi_desc->ars_status->length;
+ init_ars_start = acpi_desc->ars_status->address;
+ }
+ rc = ars_continue(acpi_desc);
+ }
+
+ if (rc < 0) {
+ dev_warn(dev, "range %d ars continuation failed\n",
+ spa->range_index);
+ break;
+ }
+
+ if (init_ars_len) {
+ ars_start = init_ars_start;
+ ars_len = init_ars_len;
+ } else {
+ ars_start = acpi_desc->ars_status->address;
+ ars_len = acpi_desc->ars_status->length;
+ }
+ dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
+ spa->range_index, ars_start, ars_len);
+ /* notify the region about new poison entries */
+ nvdimm_region_notify(nfit_spa->nd_region,
+ NVDIMM_REVALIDATE_POISON);
+ break;
+ } while (1);
+}
+
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+ struct device *dev;
+ u64 init_scrub_length = 0;
struct nfit_spa *nfit_spa;
+ u64 init_scrub_address = 0;
+ bool init_ars_done = false;
+ struct acpi_nfit_desc *acpi_desc;
+ unsigned int tmo = scrub_timeout;
+ unsigned int overflow_retry = scrub_overflow_abort;
+
+ acpi_desc = container_of(work, typeof(*acpi_desc), work);
+ dev = acpi_desc->dev;
+ /*
+ * We scrub in 2 phases. The first phase waits for any platform
+ * firmware initiated scrubs to complete and then we go search for the
+ * affected spa regions to mark them scanned. In the second phase we
+ * initiate a directed scrub for every range that was not scrubbed in
+ * phase 1.
+ */
+
+ /* process platform firmware initiated scrubs */
+ retry:
+ mutex_lock(&acpi_desc->init_mutex);
list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
- int rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+ struct nd_cmd_ars_status *ars_status;
+ struct acpi_nfit_system_address *spa;
+ u64 ars_start, ars_len;
+ int rc;
- if (rc)
- return rc;
+ if (acpi_desc->cancel)
+ break;
+
+ if (nfit_spa->nd_region)
+ continue;
+
+ if (init_ars_done) {
+ /*
+ * No need to re-query, we're now just
+ * reconciling all the ranges covered by the
+ * initial scrub
+ */
+ rc = 0;
+ } else
+ rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+
+ if (rc == -ENOTTY) {
+ /* no ars capability, just register spa and move on */
+ acpi_nfit_register_region(acpi_desc, nfit_spa);
+ continue;
+ }
+
+ if (rc == -EBUSY && !tmo) {
+ /* fallthrough to directed scrub in phase 2 */
+ dev_warn(dev, "timeout awaiting ars results, continuing...\n");
+ break;
+ } else if (rc == -EBUSY) {
+ mutex_unlock(&acpi_desc->init_mutex);
+ ssleep(1);
+ tmo--;
+ goto retry;
+ }
+
+ /* we got some results, but there are more pending... */
+ if (rc == -ENOSPC && overflow_retry--) {
+ ars_status = acpi_desc->ars_status;
+ /*
+ * Record the original scrub range, so that we
+ * can recall all the ranges impacted by the
+ * initial scrub.
+ */
+ if (!init_scrub_length) {
+ init_scrub_length = ars_status->length;
+ init_scrub_address = ars_status->address;
+ }
+ rc = ars_continue(acpi_desc);
+ if (rc == 0) {
+ mutex_unlock(&acpi_desc->init_mutex);
+ goto retry;
+ }
+ }
+
+ if (rc < 0) {
+ /*
+ * Initial scrub failed, we'll give it one more
+ * try below...
+ */
+ break;
+ }
+
+ /* We got some final results, record completed ranges */
+ ars_status = acpi_desc->ars_status;
+ if (init_scrub_length) {
+ ars_start = init_scrub_address;
+ ars_len = ars_start + init_scrub_length;
+ } else {
+ ars_start = ars_status->address;
+ ars_len = ars_status->length;
+ }
+ spa = nfit_spa->spa;
+
+ if (!init_ars_done) {
+ init_ars_done = true;
+ dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
+ ars_start, ars_len);
+ }
+ if (ars_start <= spa->address && ars_start + ars_len
+ >= spa->address + spa->length)
+ acpi_nfit_register_region(acpi_desc, nfit_spa);
}
+
+ /*
+ * For all the ranges not covered by an initial scrub we still
+ * want to see if there are errors, but it's ok to discover them
+ * asynchronously.
+ */
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+ /*
+ * Flag all the ranges that still need scrubbing, but
+ * register them now to make data available.
+ */
+ if (nfit_spa->nd_region)
+ nfit_spa->ars_done = 1;
+ else
+ acpi_nfit_register_region(acpi_desc, nfit_spa);
+ }
+
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+ acpi_nfit_async_scrub(acpi_desc, nfit_spa);
+ mutex_unlock(&acpi_desc->init_mutex);
+}
+
+static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
+{
+ struct nfit_spa *nfit_spa;
+ int rc;
+
+ list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+ if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
+ /* BLK regions don't need to wait for ars results */
+ rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+ if (rc)
+ return rc;
+ }
+
+ queue_work(nfit_wq, &acpi_desc->work);
return 0;
}
@@ -1901,15 +2213,64 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
}
EXPORT_SYMBOL_GPL(acpi_nfit_init);
-static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev)
+struct acpi_nfit_flush_work {
+ struct work_struct work;
+ struct completion cmp;
+};
+
+static void flush_probe(struct work_struct *work)
{
- struct nvdimm_bus_descriptor *nd_desc;
- struct acpi_nfit_desc *acpi_desc;
- struct device *dev = &adev->dev;
+ struct acpi_nfit_flush_work *flush;
- acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
- if (!acpi_desc)
- return ERR_PTR(-ENOMEM);
+ flush = container_of(work, typeof(*flush), work);
+ complete(&flush->cmp);
+}
+
+static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+ struct device *dev = acpi_desc->dev;
+ struct acpi_nfit_flush_work flush;
+
+ /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+ device_lock(dev);
+ device_unlock(dev);
+
+ /*
+ * Scrub work could take 10s of seconds, userspace may give up so we
+ * need to be interruptible while waiting.
+ */
+ INIT_WORK_ONSTACK(&flush.work, flush_probe);
+ COMPLETION_INITIALIZER_ONSTACK(flush.cmp);
+ queue_work(nfit_wq, &flush.work);
+ return wait_for_completion_interruptible(&flush.cmp);
+}
+
+static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
+ struct nvdimm *nvdimm, unsigned int cmd)
+{
+ struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+
+ if (nvdimm)
+ return 0;
+ if (cmd != ND_CMD_ARS_START)
+ return 0;
+
+ /*
+ * The kernel and userspace may race to initiate a scrub, but
+ * the scrub thread is prepared to lose that initial race. It
+ * just needs guarantees that any ars it initiates are not
+ * interrupted by any intervening start reqeusts from userspace.
+ */
+ if (work_busy(&acpi_desc->work))
+ return -EBUSY;
+
+ return 0;
+}
+
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
+{
+ struct nvdimm_bus_descriptor *nd_desc;
dev_set_drvdata(dev, acpi_desc);
acpi_desc->dev = dev;
@@ -1917,14 +2278,10 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev)
nd_desc = &acpi_desc->nd_desc;
nd_desc->provider_name = "ACPI.NFIT";
nd_desc->ndctl = acpi_nfit_ctl;
+ nd_desc->flush_probe = acpi_nfit_flush_probe;
+ nd_desc->clear_to_send = acpi_nfit_clear_to_send;
nd_desc->attr_groups = acpi_nfit_attribute_groups;
- acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc);
- if (!acpi_desc->nvdimm_bus) {
- devm_kfree(dev, acpi_desc);
- return ERR_PTR(-ENXIO);
- }
-
INIT_LIST_HEAD(&acpi_desc->spa_maps);
INIT_LIST_HEAD(&acpi_desc->spas);
INIT_LIST_HEAD(&acpi_desc->dcrs);
@@ -1935,9 +2292,9 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev)
INIT_LIST_HEAD(&acpi_desc->dimms);
mutex_init(&acpi_desc->spa_map_mutex);
mutex_init(&acpi_desc->init_mutex);
-
- return acpi_desc;
+ INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
}
+EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
static int acpi_nfit_add(struct acpi_device *adev)
{
@@ -1956,12 +2313,13 @@ static int acpi_nfit_add(struct acpi_device *adev)
return 0;
}
- acpi_desc = acpi_nfit_desc_init(adev);
- if (IS_ERR(acpi_desc)) {
- dev_err(dev, "%s: error initializing acpi_desc: %ld\n",
- __func__, PTR_ERR(acpi_desc));
- return PTR_ERR(acpi_desc);
- }
+ acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+ if (!acpi_desc)
+ return -ENOMEM;
+ acpi_nfit_desc_init(acpi_desc, &adev->dev);
+ acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
+ if (!acpi_desc->nvdimm_bus)
+ return -ENOMEM;
/*
* Save the acpi header for later and then skip it,
@@ -2000,6 +2358,8 @@ static int acpi_nfit_remove(struct acpi_device *adev)
{
struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev);
+ acpi_desc->cancel = 1;
+ flush_workqueue(nfit_wq);
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
return 0;
}
@@ -2024,12 +2384,19 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
}
if (!acpi_desc) {
- acpi_desc = acpi_nfit_desc_init(adev);
- if (IS_ERR(acpi_desc)) {
- dev_err(dev, "%s: error initializing acpi_desc: %ld\n",
- __func__, PTR_ERR(acpi_desc));
+ acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL);
+ if (!acpi_desc)
goto out_unlock;
- }
+ acpi_nfit_desc_init(acpi_desc, &adev->dev);
+ acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc);
+ if (!acpi_desc->nvdimm_bus)
+ goto out_unlock;
+ } else {
+ /*
+ * Finish previous registration before considering new
+ * regions.
+ */
+ flush_workqueue(nfit_wq);
}
/* Evaluate _FIT */
@@ -2097,12 +2464,17 @@ static __init int nfit_init(void)
acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]);
acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]);
+ nfit_wq = create_singlethread_workqueue("nfit");
+ if (!nfit_wq)
+ return -ENOMEM;
+
return acpi_bus_register_driver(&acpi_nfit_driver);
}
static __exit void nfit_exit(void)
{
acpi_bus_unregister_driver(&acpi_nfit_driver);
+ destroy_workqueue(nfit_wq);
}
module_init(nfit_init);
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 3d549a383659..c75576b2d50e 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -14,6 +14,7 @@
*/
#ifndef __NFIT_H__
#define __NFIT_H__
+#include <linux/workqueue.h>
#include <linux/libnvdimm.h>
#include <linux/types.h>
#include <linux/uuid.h>
@@ -40,15 +41,32 @@ enum nfit_uuids {
NFIT_UUID_MAX,
};
+enum nfit_fic {
+ NFIT_FIC_BYTE = 0x101, /* byte-addressable energy backed */
+ NFIT_FIC_BLK = 0x201, /* block-addressable non-energy backed */
+ NFIT_FIC_BYTEN = 0x301, /* byte-addressable non-energy backed */
+};
+
enum {
- ND_BLK_READ_FLUSH = 1,
- ND_BLK_DCR_LATCH = 2,
+ NFIT_BLK_READ_FLUSH = 1,
+ NFIT_BLK_DCR_LATCH = 2,
+ NFIT_ARS_STATUS_DONE = 0,
+ NFIT_ARS_STATUS_BUSY = 1 << 16,
+ NFIT_ARS_STATUS_NONE = 2 << 16,
+ NFIT_ARS_STATUS_INTR = 3 << 16,
+ NFIT_ARS_START_BUSY = 6,
+ NFIT_ARS_CAP_NONE = 1,
+ NFIT_ARS_F_OVERFLOW = 1,
+ NFIT_ARS_TIMEOUT = 90,
};
struct nfit_spa {
struct acpi_nfit_system_address *spa;
struct list_head list;
- int is_registered;
+ struct nd_region *nd_region;
+ unsigned int ars_done:1;
+ u32 clear_err_unit;
+ u32 max_ars;
};
struct nfit_dcr {
@@ -110,6 +128,10 @@ struct acpi_nfit_desc {
struct list_head idts;
struct nvdimm_bus *nvdimm_bus;
struct device *dev;
+ struct nd_cmd_ars_status *ars_status;
+ size_t ars_status_size;
+ struct work_struct work;
+ unsigned int cancel:1;
unsigned long dimm_dsm_force_en;
unsigned long bus_dsm_force_en;
int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
@@ -182,5 +204,5 @@ static inline struct acpi_nfit_desc *to_acpi_desc(
const u8 *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz);
-extern const struct attribute_group *acpi_nfit_attribute_groups[];
+void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev);
#endif /* __NFIT_H__ */
diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c
index 91a336ea8c4f..e9ff9229d942 100644
--- a/drivers/nvdimm/blk.c
+++ b/drivers/nvdimm/blk.c
@@ -31,8 +31,6 @@ struct nd_blk_device {
u32 internal_lbasize;
};
-static int nd_blk_major;
-
static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev)
{
return blk_dev->nsblk->lbasize - blk_dev->sector_size;
@@ -264,7 +262,6 @@ static int nd_blk_attach_disk(struct nd_namespace_common *ndns,
}
disk->driverfs_dev = &ndns->dev;
- disk->major = nd_blk_major;
disk->first_minor = 0;
disk->fops = &nd_blk_fops;
disk->private_data = blk_dev;
@@ -358,25 +355,12 @@ static struct nd_device_driver nd_blk_driver = {
static int __init nd_blk_init(void)
{
- int rc;
-
- rc = register_blkdev(0, "nd_blk");
- if (rc < 0)
- return rc;
-
- nd_blk_major = rc;
- rc = nd_driver_register(&nd_blk_driver);
-
- if (rc < 0)
- unregister_blkdev(nd_blk_major, "nd_blk");
-
- return rc;
+ return nd_driver_register(&nd_blk_driver);
}
static void __exit nd_blk_exit(void)
{
driver_unregister(&nd_blk_driver.drv);
- unregister_blkdev(nd_blk_major, "nd_blk");
}
MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c
index efb2c1ceef98..c32cbb593600 100644
--- a/drivers/nvdimm/btt.c
+++ b/drivers/nvdimm/btt.c
@@ -31,8 +31,6 @@ enum log_ent_request {
LOG_OLD_ENT
};
-static int btt_major;
-
static int arena_read_bytes(struct arena_info *arena, resource_size_t offset,
void *buf, size_t n)
{
@@ -1246,7 +1244,6 @@ static int btt_blk_init(struct btt *btt)
nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name);
btt->btt_disk->driverfs_dev = &btt->nd_btt->dev;
- btt->btt_disk->major = btt_major;
btt->btt_disk->first_minor = 0;
btt->btt_disk->fops = &btt_fops;
btt->btt_disk->private_data = btt;
@@ -1423,22 +1420,11 @@ EXPORT_SYMBOL(nvdimm_namespace_detach_btt);
static int __init nd_btt_init(void)
{
- int rc;
-
- btt_major = register_blkdev(0, "btt");
- if (btt_major < 0)
- return btt_major;
+ int rc = 0;
debugfs_root = debugfs_create_dir("btt", NULL);
- if (IS_ERR_OR_NULL(debugfs_root)) {
+ if (IS_ERR_OR_NULL(debugfs_root))
rc = -ENXIO;
- goto err_debugfs;
- }
-
- return 0;
-
- err_debugfs:
- unregister_blkdev(btt_major, "btt");
return rc;
}
@@ -1446,7 +1432,6 @@ static int __init nd_btt_init(void)
static void __exit nd_btt_exit(void)
{
debugfs_remove_recursive(debugfs_root);
- unregister_blkdev(btt_major, "btt");
}
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT);
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 5d28e9405f32..33557481d452 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -133,6 +133,78 @@ static int nvdimm_bus_remove(struct device *dev)
return rc;
}
+void nd_device_notify(struct device *dev, enum nvdimm_event event)
+{
+ device_lock(dev);
+ if (dev->driver) {
+ struct nd_device_driver *nd_drv;
+
+ nd_drv = to_nd_device_driver(dev->driver);
+ if (nd_drv->notify)
+ nd_drv->notify(dev, event);
+ }
+ device_unlock(dev);
+}
+EXPORT_SYMBOL(nd_device_notify);
+
+void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
+
+ if (!nvdimm_bus)
+ return;
+
+ /* caller is responsible for holding a reference on the device */
+ nd_device_notify(&nd_region->dev, event);
+}
+EXPORT_SYMBOL_GPL(nvdimm_region_notify);
+
+long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
+ unsigned int len)
+{
+ struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc;
+ struct nd_cmd_clear_error clear_err;
+ struct nd_cmd_ars_cap ars_cap;
+ u32 clear_err_unit, mask;
+ int cmd_rc, rc;
+
+ if (!nvdimm_bus)
+ return -ENXIO;
+
+ nd_desc = nvdimm_bus->nd_desc;
+ if (!nd_desc->ndctl)
+ return -ENXIO;
+
+ memset(&ars_cap, 0, sizeof(ars_cap));
+ ars_cap.address = phys;
+ ars_cap.length = len;
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap,
+ sizeof(ars_cap), &cmd_rc);
+ if (rc < 0)
+ return rc;
+ if (cmd_rc < 0)
+ return cmd_rc;
+ clear_err_unit = ars_cap.clear_err_unit;
+ if (!clear_err_unit || !is_power_of_2(clear_err_unit))
+ return -ENXIO;
+
+ mask = clear_err_unit - 1;
+ if ((phys | len) & mask)
+ return -ENXIO;
+ memset(&clear_err, 0, sizeof(clear_err));
+ clear_err.address = phys;
+ clear_err.length = len;
+ rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err,
+ sizeof(clear_err), &cmd_rc);
+ if (rc < 0)
+ return rc;
+ if (cmd_rc < 0)
+ return cmd_rc;
+ return clear_err.cleared;
+}
+EXPORT_SYMBOL_GPL(nvdimm_clear_poison);
+
static struct bus_type nvdimm_bus_type = {
.name = "nd",
.uevent = nvdimm_bus_uevent,
@@ -395,6 +467,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
.out_num = 3,
.out_sizes = { 4, 4, UINT_MAX, },
},
+ [ND_CMD_CLEAR_ERROR] = {
+ .in_num = 2,
+ .in_sizes = { 8, 8, },
+ .out_num = 3,
+ .out_sizes = { 4, 4, 8, },
+ },
};
const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
@@ -463,17 +541,37 @@ void wait_nvdimm_bus_probe_idle(struct device *dev)
} while (true);
}
+static int pmem_active(struct device *dev, void *data)
+{
+ if (is_nd_pmem(dev) && dev->driver)
+ return -EBUSY;
+ return 0;
+}
+
/* set_config requires an idle interleave set */
-static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
+static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus,
+ struct nvdimm *nvdimm, unsigned int cmd)
{
- struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+ /* ask the bus provider if it would like to block this request */
+ if (nd_desc->clear_to_send) {
+ int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd);
+
+ if (rc)
+ return rc;
+ }
+
+ /* require clear error to go through the pmem driver */
+ if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR)
+ return device_for_each_child(&nvdimm_bus->dev, NULL,
+ pmem_active);
if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA)
return 0;
- nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
+ /* prevent label manipulation while the kernel owns label updates */
wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
-
if (atomic_read(&nvdimm->busy))
return -EBUSY;
return 0;
@@ -513,10 +611,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
/* fail write commands (when read-only) */
if (read_only)
- switch (ioctl_cmd) {
- case ND_IOCTL_VENDOR:
- case ND_IOCTL_SET_CONFIG_DATA:
- case ND_IOCTL_ARS_START:
+ switch (cmd) {
+ case ND_CMD_VENDOR:
+ case ND_CMD_SET_CONFIG_DATA:
+ case ND_CMD_ARS_START:
+ case ND_CMD_CLEAR_ERROR:
dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
nvdimm ? nvdimm_cmd_name(cmd)
: nvdimm_bus_cmd_name(cmd));
@@ -583,11 +682,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
}
nvdimm_bus_lock(&nvdimm_bus->dev);
- rc = nd_cmd_clear_to_send(nvdimm, cmd);
+ rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd);
if (rc)
goto out_unlock;
- rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len);
+ rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL);
if (rc < 0)
goto out_unlock;
if (copy_to_user(p, buf, buf_len))
@@ -602,14 +701,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long id = (long) file->private_data;
- int rc = -ENXIO, read_only;
+ int rc = -ENXIO, ro;
struct nvdimm_bus *nvdimm_bus;
- read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+ ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
mutex_lock(&nvdimm_bus_list_mutex);
list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
if (nvdimm_bus->id == id) {
- rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg);
+ rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg);
break;
}
}
@@ -633,10 +732,10 @@ static int match_dimm(struct device *dev, void *data)
static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
- int rc = -ENXIO, read_only;
+ int rc = -ENXIO, ro;
struct nvdimm_bus *nvdimm_bus;
- read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+ ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
mutex_lock(&nvdimm_bus_list_mutex);
list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
struct device *dev = device_find_child(&nvdimm_bus->dev,
@@ -647,7 +746,7 @@ static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
continue;
nvdimm = to_nvdimm(dev);
- rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg);
+ rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
put_device(dev);
break;
}
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 2e2832b83c93..79646d0c3277 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -298,6 +298,15 @@ static int flush_regions_dimms(struct device *dev, void *data)
static ssize_t wait_probe_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
+ struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+ struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+ int rc;
+
+ if (nd_desc->flush_probe) {
+ rc = nd_desc->flush_probe(nd_desc);
+ if (rc)
+ return rc;
+ }
nd_synchronize();
device_for_each_child(dev, NULL, flush_regions_dimms);
return sprintf(buf, "1\n");
@@ -408,33 +417,11 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
set_badblock(bb, start_sector, num_sectors);
}
-/**
- * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks
- * @ndns: the namespace containing poison ranges
- * @bb: badblocks instance to populate
- * @offset: offset at the start of the namespace before 'sector 0'
- *
- * The poison list generated during NFIT initialization may contain multiple,
- * possibly overlapping ranges in the SPA (System Physical Address) space.
- * Compare each of these ranges to the namespace currently being initialized,
- * and add badblocks to the gendisk for all matching sub-ranges
- */
-void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns,
- struct badblocks *bb, resource_size_t offset)
+static void namespace_add_poison(struct list_head *poison_list,
+ struct badblocks *bb, struct resource *res)
{
- struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
- struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
- struct nvdimm_bus *nvdimm_bus;
- struct list_head *poison_list;
- u64 ns_start, ns_end, ns_size;
struct nd_poison *pl;
- ns_size = nvdimm_namespace_capacity(ndns) - offset;
- ns_start = nsio->res.start + offset;
- ns_end = nsio->res.end;
-
- nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
- poison_list = &nvdimm_bus->poison_list;
if (list_empty(poison_list))
return;
@@ -442,37 +429,69 @@ void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns,
u64 pl_end = pl->start + pl->length - 1;
/* Discard intervals with no intersection */
- if (pl_end < ns_start)
+ if (pl_end < res->start)
continue;
- if (pl->start > ns_end)
+ if (pl->start > res->end)
continue;
/* Deal with any overlap after start of the namespace */
- if (pl->start >= ns_start) {
+ if (pl->start >= res->start) {
u64 start = pl->start;
u64 len;
- if (pl_end <= ns_end)
+ if (pl_end <= res->end)
len = pl->length;
else
- len = ns_start + ns_size - pl->start;
- __add_badblock_range(bb, start - ns_start, len);
+ len = res->start + resource_size(res)
+ - pl->start;
+ __add_badblock_range(bb, start - res->start, len);
continue;
}
/* Deal with overlap for poison starting before the namespace */
- if (pl->start < ns_start) {
+ if (pl->start < res->start) {
u64 len;
- if (pl_end < ns_end)
- len = pl->start + pl->length - ns_start;
+ if (pl_end < res->end)
+ len = pl->start + pl->length - res->start;
else
- len = ns_size;
+ len = resource_size(res);
__add_badblock_range(bb, 0, len);
}
}
}
+
+/**
+ * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks
+ * @ndns: the namespace containing poison ranges
+ * @bb: badblocks instance to populate
+ * @offset: offset at the start of the namespace before 'sector 0'
+ *
+ * The poison list generated during NFIT initialization may contain multiple,
+ * possibly overlapping ranges in the SPA (System Physical Address) space.
+ * Compare each of these ranges to the namespace currently being initialized,
+ * and add badblocks to the gendisk for all matching sub-ranges
+ */
+void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns,
+ struct badblocks *bb, resource_size_t offset)
+{
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
+ struct nvdimm_bus *nvdimm_bus;
+ struct list_head *poison_list;
+ struct resource res = {
+ .start = nsio->res.start + offset,
+ .end = nsio->res.end,
+ };
+
+ nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent);
+ poison_list = &nvdimm_bus->poison_list;
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
+ namespace_add_poison(poison_list, bb, &res);
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+}
EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison);
-static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
struct nd_poison *pl;
@@ -487,12 +506,12 @@ static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
return 0;
}
-int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
{
struct nd_poison *pl;
if (list_empty(&nvdimm_bus->poison_list))
- return __add_poison(nvdimm_bus, addr, length);
+ return add_poison(nvdimm_bus, addr, length);
/*
* There is a chance this is a duplicate, check for those first.
@@ -512,7 +531,18 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
* as any overlapping ranges will get resolved when the list is consumed
* and converted to badblocks
*/
- return __add_poison(nvdimm_bus, addr, length);
+ return add_poison(nvdimm_bus, addr, length);
+}
+
+int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length)
+{
+ int rc;
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
+ rc = bus_add_poison(nvdimm_bus, addr, length);
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+
+ return rc;
}
EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison);
@@ -553,7 +583,11 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
+
+ nvdimm_bus_lock(&nvdimm_bus->dev);
free_poison_list(&nvdimm_bus->poison_list);
+ nvdimm_bus_unlock(&nvdimm_bus->dev);
+
nvdimm_bus_destroy_ndctl(nvdimm_bus);
device_unregister(&nvdimm_bus->dev);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 651b8d19d324..c56f88217924 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -75,7 +75,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
memset(cmd, 0, sizeof(*cmd));
nd_desc = nvdimm_bus->nd_desc;
return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
- ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd));
+ ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL);
}
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
@@ -120,7 +120,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
cmd->in_offset = offset;
rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
ND_CMD_GET_CONFIG_DATA, cmd,
- cmd->in_length + sizeof(*cmd));
+ cmd->in_length + sizeof(*cmd), NULL);
if (rc || cmd->status) {
rc = -ENXIO;
break;
@@ -171,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
status = ((void *) cmd) + cmd_size - sizeof(u32);
rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
- ND_CMD_SET_CONFIG_DATA, cmd, cmd_size);
+ ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL);
if (rc || *status) {
rc = rc ? rc : -ENXIO;
break;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 9edf7eb7d17c..f5cb88601359 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -133,6 +133,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
bool pmem_should_map_pages(struct device *dev)
{
struct nd_region *nd_region = to_nd_region(dev->parent);
+ struct nd_namespace_io *nsio;
if (!IS_ENABLED(CONFIG_ZONE_DEVICE))
return false;
@@ -143,6 +144,12 @@ bool pmem_should_map_pages(struct device *dev)
if (is_nd_pfn(dev) || is_nd_btt(dev))
return false;
+ nsio = to_nd_namespace_io(dev);
+ if (region_intersects(nsio->res.start, resource_size(&nsio->res),
+ IORESOURCE_SYSTEM_RAM,
+ IORES_DESC_NONE) == REGION_MIXED)
+ return false;
+
#ifdef ARCH_MEMREMAP_PMEM
return ARCH_MEMREMAP_PMEM == MEMREMAP_WB;
#else
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ba1633b9da31..1799bd97a9ce 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -18,6 +18,7 @@
#include <linux/mutex.h>
#include <linux/ndctl.h>
#include <linux/types.h>
+#include <linux/nd.h>
#include "label.h"
enum {
@@ -168,6 +169,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size);
void wait_nvdimm_bus_probe_idle(struct device *dev);
void nd_device_register(struct device *dev);
void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+void nd_device_notify(struct device *dev, enum nvdimm_event event);
int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
size_t len);
ssize_t nd_sector_size_show(unsigned long current_lbasize,
@@ -184,6 +186,8 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
+long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
+ unsigned int len);
struct nd_btt *to_nd_btt(struct device *dev);
struct nd_gen_sb {
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index cc243754acef..8e343a3ca873 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -15,6 +15,7 @@
#define __NVDIMM_PFN_H
#include <linux/types.h>
+#include <linux/mmzone.h>
#define PFN_SIG_LEN 16
#define PFN_SIG "NVDIMM_PFN_INFO\0"
@@ -26,10 +27,28 @@ struct nd_pfn_sb {
__le32 flags;
__le16 version_major;
__le16 version_minor;
- __le64 dataoff;
+ __le64 dataoff; /* relative to namespace_base + start_pad */
__le64 npfns;
__le32 mode;
- u8 padding[4012];
+ /* minor-version-1 additions for section alignment */
+ __le32 start_pad;
+ __le32 end_trunc;
+ u8 padding[4004];
__le64 checksum;
};
+
+#ifdef CONFIG_SPARSEMEM
+#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x)
+#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x)
+#else
+/*
+ * In this case ZONE_DEVICE=n and we will disable 'pfn' device support,
+ * but we still want pmem to compile.
+ */
+#define PFN_SECTION_ALIGN_DOWN(x) (x)
+#define PFN_SECTION_ALIGN_UP(x) (x)
+#endif
+
+#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x)))
+#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x)))
#endif /* __NVDIMM_PFN_H */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index ae81a2f1da50..254d3bc13f70 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -205,11 +205,67 @@ static ssize_t namespace_store(struct device *dev,
}
static DEVICE_ATTR_RW(namespace);
+static ssize_t resource_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+ ssize_t rc;
+
+ device_lock(dev);
+ if (dev->driver) {
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ u64 offset = __le64_to_cpu(pfn_sb->dataoff);
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+
+ rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start
+ + start_pad + offset);
+ } else {
+ /* no address to convey if the pfn instance is disabled */
+ rc = -ENXIO;
+ }
+ device_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RO(resource);
+
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(dev);
+ ssize_t rc;
+
+ device_lock(dev);
+ if (dev->driver) {
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ u64 offset = __le64_to_cpu(pfn_sb->dataoff);
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
+ u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+
+ rc = sprintf(buf, "%llu\n", (unsigned long long)
+ resource_size(&nsio->res) - start_pad
+ - end_trunc - offset);
+ } else {
+ /* no size to convey if the pfn instance is disabled */
+ rc = -ENXIO;
+ }
+ device_unlock(dev);
+
+ return rc;
+}
+static DEVICE_ATTR_RO(size);
+
static struct attribute *nd_pfn_attributes[] = {
&dev_attr_mode.attr,
&dev_attr_namespace.attr,
&dev_attr_uuid.attr,
&dev_attr_align.attr,
+ &dev_attr_resource.attr,
+ &dev_attr_size.attr,
NULL,
};
@@ -299,6 +355,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn)
if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0)
return -ENODEV;
+ if (__le16_to_cpu(pfn_sb->version_minor) < 1) {
+ pfn_sb->start_pad = 0;
+ pfn_sb->end_trunc = 0;
+ }
+
switch (le32_to_cpu(pfn_sb->mode)) {
case PFN_MODE_RAM:
case PFN_MODE_PMEM:
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 8d0b54670184..ca5721c306bb 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -43,12 +43,13 @@ struct pmem_device {
phys_addr_t data_offset;
u64 pfn_flags;
void __pmem *virt_addr;
+ /* immutable base size of the namespace */
size_t size;
+ /* trim size when namespace capacity has been section aligned */
+ u32 pfn_pad;
struct badblocks bb;
};
-static int pmem_major;
-
static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
{
if (bb->count) {
@@ -62,26 +63,56 @@ static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
return false;
}
+static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
+ unsigned int len)
+{
+ struct device *dev = disk_to_dev(pmem->pmem_disk);
+ sector_t sector;
+ long cleared;
+
+ sector = (offset - pmem->data_offset) / 512;
+ cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);
+
+ if (cleared > 0 && cleared / 512) {
+ dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
+ __func__, (unsigned long long) sector,
+ cleared / 512, cleared / 512 > 1 ? "s" : "");
+ badblocks_clear(&pmem->bb, sector, cleared / 512);
+ }
+ invalidate_pmem(pmem->virt_addr + offset, len);
+}
+
static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
unsigned int len, unsigned int off, int rw,
sector_t sector)
{
+ int rc = 0;
+ bool bad_pmem = false;
void *mem = kmap_atomic(page);
phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
+ if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
+ bad_pmem = true;
+
if (rw == READ) {
- if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
- return -EIO;
- memcpy_from_pmem(mem + off, pmem_addr, len);
- flush_dcache_page(page);
+ if (unlikely(bad_pmem))
+ rc = -EIO;
+ else {
+ memcpy_from_pmem(mem + off, pmem_addr, len);
+ flush_dcache_page(page);
+ }
} else {
flush_dcache_page(page);
memcpy_to_pmem(pmem_addr, mem + off, len);
+ if (unlikely(bad_pmem)) {
+ pmem_clear_poison(pmem, pmem_off, len);
+ memcpy_to_pmem(pmem_addr, mem + off, len);
+ }
}
kunmap_atomic(mem);
- return 0;
+ return rc;
}
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
@@ -145,7 +176,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector,
*kaddr = pmem->virt_addr + offset;
*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
- return pmem->size - offset;
+ return pmem->size - pmem->pfn_pad - offset;
}
static const struct block_device_operations pmem_fops = {
@@ -228,15 +259,14 @@ static int pmem_attach_disk(struct device *dev,
return -ENOMEM;
}
- disk->major = pmem_major;
- disk->first_minor = 0;
disk->fops = &pmem_fops;
disk->private_data = pmem;
disk->queue = pmem->pmem_queue;
disk->flags = GENHD_FL_EXT_DEVT;
nvdimm_namespace_disk_name(ndns, disk->disk_name);
disk->driverfs_dev = dev;
- set_capacity(disk, (pmem->size - pmem->data_offset) / 512);
+ set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
+ / 512);
pmem->pmem_disk = disk;
devm_exit_badblocks(dev, &pmem->bb);
if (devm_init_badblocks(dev, &pmem->bb))
@@ -279,6 +309,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
struct nd_namespace_common *ndns = nd_pfn->ndns;
+ u32 start_pad = 0, end_trunc = 0;
+ resource_size_t start, size;
+ struct nd_namespace_io *nsio;
struct nd_region *nd_region;
unsigned long npfns;
phys_addr_t offset;
@@ -304,21 +337,56 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
}
memset(pfn_sb, 0, sizeof(*pfn_sb));
- npfns = (pmem->size - SZ_8K) / SZ_4K;
+
+ /*
+ * Check if pmem collides with 'System RAM' when section aligned and
+ * trim it accordingly
+ */
+ nsio = to_nd_namespace_io(&ndns->dev);
+ start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
+ size = resource_size(&nsio->res);
+ if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
+ IORES_DESC_NONE) == REGION_MIXED) {
+
+ start = nsio->res.start;
+ start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
+ }
+
+ start = nsio->res.start;
+ size = PHYS_SECTION_ALIGN_UP(start + size) - start;
+ if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
+ IORES_DESC_NONE) == REGION_MIXED) {
+ size = resource_size(&nsio->res);
+ end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
+ }
+
+ if (start_pad + end_trunc)
+ dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
+ dev_name(&ndns->dev), start_pad + end_trunc);
+
/*
* Note, we use 64 here for the standard size of struct page,
* debugging options may cause it to be larger in which case the
* implementation will limit the pfns advertised through
* ->direct_access() to those that are included in the memmap.
*/
+ start += start_pad;
+ npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K;
if (nd_pfn->mode == PFN_MODE_PMEM)
- offset = ALIGN(SZ_8K + 64 * npfns, nd_pfn->align);
+ offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align)
+ - start;
else if (nd_pfn->mode == PFN_MODE_RAM)
- offset = ALIGN(SZ_8K, nd_pfn->align);
+ offset = ALIGN(start + SZ_8K, nd_pfn->align) - start;
else
goto err;
- npfns = (pmem->size - offset) / SZ_4K;
+ if (offset + start_pad + end_trunc >= pmem->size) {
+ dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
+ dev_name(&ndns->dev));
+ goto err;
+ }
+
+ npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K;
pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
pfn_sb->dataoff = cpu_to_le64(offset);
pfn_sb->npfns = cpu_to_le64(npfns);
@@ -326,6 +394,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
pfn_sb->version_major = cpu_to_le16(1);
+ pfn_sb->version_minor = cpu_to_le16(1);
+ pfn_sb->start_pad = cpu_to_le32(start_pad);
+ pfn_sb->end_trunc = cpu_to_le32(end_trunc);
checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
pfn_sb->checksum = cpu_to_le64(checksum);
@@ -356,41 +427,56 @@ static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
return 0;
}
-static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
+/*
+ * We hotplug memory at section granularity, pad the reserved area from
+ * the previous section base to the namespace base address.
+ */
+static unsigned long init_altmap_base(resource_size_t base)
+{
+ unsigned long base_pfn = PHYS_PFN(base);
+
+ return PFN_SECTION_ALIGN_DOWN(base_pfn);
+}
+
+static unsigned long init_altmap_reserve(resource_size_t base)
+{
+ unsigned long reserve = PHYS_PFN(SZ_8K);
+ unsigned long base_pfn = PHYS_PFN(base);
+
+ reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
+ return reserve;
+}
+
+static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
{
- struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
- struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
- struct device *dev = &nd_pfn->dev;
- struct nd_region *nd_region;
- struct vmem_altmap *altmap;
- struct nd_pfn_sb *pfn_sb;
- struct pmem_device *pmem;
- struct request_queue *q;
- phys_addr_t offset;
int rc;
+ struct resource res;
+ struct request_queue *q;
+ struct pmem_device *pmem;
+ struct vmem_altmap *altmap;
+ struct device *dev = &nd_pfn->dev;
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
+ u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ resource_size_t base = nsio->res.start + start_pad;
struct vmem_altmap __altmap = {
- .base_pfn = __phys_to_pfn(nsio->res.start),
- .reserve = __phys_to_pfn(SZ_8K),
+ .base_pfn = init_altmap_base(base),
+ .reserve = init_altmap_reserve(base),
};
- if (!nd_pfn->uuid || !nd_pfn->ndns)
- return -ENODEV;
-
- nd_region = to_nd_region(dev->parent);
- rc = nd_pfn_init(nd_pfn);
- if (rc)
- return rc;
-
- pfn_sb = nd_pfn->pfn_sb;
- offset = le64_to_cpu(pfn_sb->dataoff);
+ pmem = dev_get_drvdata(dev);
+ pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
+ pmem->pfn_pad = start_pad + end_trunc;
nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
if (nd_pfn->mode == PFN_MODE_RAM) {
- if (offset < SZ_8K)
+ if (pmem->data_offset < SZ_8K)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
altmap = NULL;
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
- nd_pfn->npfns = (resource_size(&nsio->res) - offset)
+ nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset)
/ PAGE_SIZE;
if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
dev_info(&nd_pfn->dev,
@@ -398,7 +484,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
le64_to_cpu(nd_pfn->pfn_sb->npfns),
nd_pfn->npfns);
altmap = & __altmap;
- altmap->free = __phys_to_pfn(offset - SZ_8K);
+ altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K);
altmap->alloc = 0;
} else {
rc = -ENXIO;
@@ -406,10 +492,12 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
}
/* establish pfn range for lookup, and switch to direct map */
- pmem = dev_get_drvdata(dev);
q = pmem->pmem_queue;
+ memcpy(&res, &nsio->res, sizeof(res));
+ res.start += start_pad;
+ res.end -= end_trunc;
devm_memunmap(dev, (void __force *) pmem->virt_addr);
- pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res,
+ pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res,
&q->q_usage_counter, altmap);
pmem->pfn_flags |= PFN_MAP;
if (IS_ERR(pmem->virt_addr)) {
@@ -418,7 +506,6 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
}
/* attach pmem disk in "pfn-mode" */
- pmem->data_offset = offset;
rc = pmem_attach_disk(dev, ndns, pmem);
if (rc)
goto err;
@@ -427,6 +514,22 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
err:
nvdimm_namespace_detach_pfn(ndns);
return rc;
+
+}
+
+static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
+{
+ struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
+ int rc;
+
+ if (!nd_pfn->uuid || !nd_pfn->ndns)
+ return -ENODEV;
+
+ rc = nd_pfn_init(nd_pfn);
+ if (rc)
+ return rc;
+ /* we need a valid pfn_sb before we can init a vmem_altmap */
+ return __nvdimm_namespace_attach_pfn(nd_pfn);
}
static int nd_pmem_probe(struct device *dev)
@@ -488,12 +591,27 @@ static int nd_pmem_remove(struct device *dev)
return 0;
}
+static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
+{
+ struct pmem_device *pmem = dev_get_drvdata(dev);
+ struct nd_namespace_common *ndns = pmem->ndns;
+
+ if (event != NVDIMM_REVALIDATE_POISON)
+ return;
+
+ if (is_nd_btt(dev))
+ nvdimm_namespace_add_poison(ndns, &pmem->bb, 0);
+ else
+ nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset);
+}
+
MODULE_ALIAS("pmem");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
static struct nd_device_driver nd_pmem_driver = {
.probe = nd_pmem_probe,
.remove = nd_pmem_remove,
+ .notify = nd_pmem_notify,
.drv = {
.name = "nd_pmem",
},
@@ -502,26 +620,13 @@ static struct nd_device_driver nd_pmem_driver = {
static int __init pmem_init(void)
{
- int error;
-
- pmem_major = register_blkdev(0, "pmem");
- if (pmem_major < 0)
- return pmem_major;
-
- error = nd_driver_register(&nd_pmem_driver);
- if (error) {
- unregister_blkdev(pmem_major, "pmem");
- return error;
- }
-
- return 0;
+ return nd_driver_register(&nd_pmem_driver);
}
module_init(pmem_init);
static void pmem_exit(void)
{
driver_unregister(&nd_pmem_driver.drv);
- unregister_blkdev(pmem_major, "pmem");
}
module_exit(pmem_exit);
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 7da63eac78ee..4b7715e29cff 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -93,9 +93,21 @@ static int nd_region_remove(struct device *dev)
return 0;
}
+static int child_notify(struct device *dev, void *data)
+{
+ nd_device_notify(dev, *(enum nvdimm_event *) data);
+ return 0;
+}
+
+static void nd_region_notify(struct device *dev, enum nvdimm_event event)
+{
+ device_for_each_child(dev, &event, child_notify);
+}
+
static struct nd_device_driver nd_region_driver = {
.probe = nd_region_probe,
.remove = nd_region_remove,
+ .notify = nd_region_notify,
.drv = {
.name = "nd_region",
},