diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/nfit.c | 798 | ||||
-rw-r--r-- | drivers/acpi/nfit.h | 30 | ||||
-rw-r--r-- | drivers/nvdimm/blk.c | 18 | ||||
-rw-r--r-- | drivers/nvdimm/btt.c | 19 | ||||
-rw-r--r-- | drivers/nvdimm/bus.c | 131 | ||||
-rw-r--r-- | drivers/nvdimm/core.c | 110 | ||||
-rw-r--r-- | drivers/nvdimm/dimm_devs.c | 6 | ||||
-rw-r--r-- | drivers/nvdimm/namespace_devs.c | 7 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 4 | ||||
-rw-r--r-- | drivers/nvdimm/pfn.h | 23 | ||||
-rw-r--r-- | drivers/nvdimm/pfn_devs.c | 61 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 219 | ||||
-rw-r--r-- | drivers/nvdimm/region.c | 12 |
13 files changed, 1071 insertions, 367 deletions
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 35947ac87644..d0f35e63640b 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -21,6 +21,7 @@ #include <linux/sort.h> #include <linux/pmem.h> #include <linux/io.h> +#include <linux/nd.h> #include <asm/cacheflush.h> #include "nfit.h" @@ -34,6 +35,18 @@ static bool force_enable_dimms; module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR); MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status"); +static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT; +module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds"); + +/* after three payloads of overflow, it's dead jim */ +static unsigned int scrub_overflow_abort = 3; +module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR); +MODULE_PARM_DESC(scrub_overflow_abort, + "Number of times we overflow ARS results before abort"); + +static struct workqueue_struct *nfit_wq; + struct nfit_table_prev { struct list_head spas; struct list_head memdevs; @@ -72,9 +85,90 @@ static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc) return to_acpi_device(acpi_desc->dev); } +static int xlat_status(void *buf, unsigned int cmd) +{ + struct nd_cmd_clear_error *clear_err; + struct nd_cmd_ars_status *ars_status; + struct nd_cmd_ars_start *ars_start; + struct nd_cmd_ars_cap *ars_cap; + u16 flags; + + switch (cmd) { + case ND_CMD_ARS_CAP: + ars_cap = buf; + if ((ars_cap->status & 0xffff) == NFIT_ARS_CAP_NONE) + return -ENOTTY; + + /* Command failed */ + if (ars_cap->status & 0xffff) + return -EIO; + + /* No supported scan types for this range */ + flags = ND_ARS_PERSISTENT | ND_ARS_VOLATILE; + if ((ars_cap->status >> 16 & flags) == 0) + return -ENOTTY; + break; + case ND_CMD_ARS_START: + ars_start = buf; + /* ARS is in progress */ + if ((ars_start->status & 0xffff) == NFIT_ARS_START_BUSY) + return -EBUSY; + + /* Command failed */ + if (ars_start->status & 0xffff) + return -EIO; + break; + case ND_CMD_ARS_STATUS: + ars_status = buf; + /* Command failed */ + if (ars_status->status & 0xffff) + return -EIO; + /* Check extended status (Upper two bytes) */ + if (ars_status->status == NFIT_ARS_STATUS_DONE) + return 0; + + /* ARS is in progress */ + if (ars_status->status == NFIT_ARS_STATUS_BUSY) + return -EBUSY; + + /* No ARS performed for the current boot */ + if (ars_status->status == NFIT_ARS_STATUS_NONE) + return -EAGAIN; + + /* + * ARS interrupted, either we overflowed or some other + * agent wants the scan to stop. If we didn't overflow + * then just continue with the returned results. + */ + if (ars_status->status == NFIT_ARS_STATUS_INTR) { + if (ars_status->flags & NFIT_ARS_F_OVERFLOW) + return -ENOSPC; + return 0; + } + + /* Unknown status */ + if (ars_status->status >> 16) + return -EIO; + break; + case ND_CMD_CLEAR_ERROR: + clear_err = buf; + if (clear_err->status & 0xffff) + return -EIO; + if (!clear_err->cleared) + return -EIO; + if (clear_err->length > clear_err->cleared) + return clear_err->cleared; + break; + default: + break; + } + + return 0; +} + static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm, unsigned int cmd, void *buf, - unsigned int buf_len) + unsigned int buf_len, int *cmd_rc) { struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); const struct nd_cmd_desc *desc = NULL; @@ -185,6 +279,8 @@ static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, * unfilled in the output buffer */ rc = buf_len - offset - in_buf.buffer.length; + if (cmd_rc) + *cmd_rc = xlat_status(buf, cmd); } else { dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n", __func__, dimm_name, cmd_name, buf_len, @@ -675,12 +771,11 @@ static struct attribute_group acpi_nfit_attribute_group = { .attrs = acpi_nfit_attributes, }; -const struct attribute_group *acpi_nfit_attribute_groups[] = { +static const struct attribute_group *acpi_nfit_attribute_groups[] = { &nvdimm_bus_attribute_group, &acpi_nfit_attribute_group, NULL, }; -EXPORT_SYMBOL_GPL(acpi_nfit_attribute_groups); static struct acpi_nfit_memory_map *to_nfit_memdev(struct device *dev) { @@ -917,7 +1012,7 @@ static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc) if (!adev) return; - for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++) + for (i = ND_CMD_ARS_CAP; i <= ND_CMD_CLEAR_ERROR; i++) if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i)) set_bit(i, &nd_desc->dsm_mask); } @@ -1105,7 +1200,7 @@ static void write_blk_ctl(struct nfit_blk *nfit_blk, unsigned int bw, writeq(cmd, mmio->addr.base + offset); wmb_blk(nfit_blk); - if (nfit_blk->dimm_flags & ND_BLK_DCR_LATCH) + if (nfit_blk->dimm_flags & NFIT_BLK_DCR_LATCH) readq(mmio->addr.base + offset); } @@ -1141,7 +1236,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk, memcpy_to_pmem(mmio->addr.aperture + offset, iobuf + copied, c); else { - if (nfit_blk->dimm_flags & ND_BLK_READ_FLUSH) + if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH) mmio_flush_range((void __force *) mmio->addr.aperture + offset, c); @@ -1328,13 +1423,13 @@ static int acpi_nfit_blk_get_flags(struct nvdimm_bus_descriptor *nd_desc, memset(&flags, 0, sizeof(flags)); rc = nd_desc->ndctl(nd_desc, nvdimm, ND_CMD_DIMM_FLAGS, &flags, - sizeof(flags)); + sizeof(flags), NULL); if (rc >= 0 && flags.status == 0) nfit_blk->dimm_flags = flags.flags; else if (rc == -ENOTTY) { /* fall back to a conservative default */ - nfit_blk->dimm_flags = ND_BLK_DCR_LATCH | ND_BLK_READ_FLUSH; + nfit_blk->dimm_flags = NFIT_BLK_DCR_LATCH | NFIT_BLK_READ_FLUSH; rc = 0; } else rc = -ENXIO; @@ -1473,93 +1568,85 @@ static void acpi_nfit_blk_region_disable(struct nvdimm_bus *nvdimm_bus, /* devm will free nfit_blk */ } -static int ars_get_cap(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_cap *cmd, u64 addr, u64 length) +static int ars_get_cap(struct acpi_nfit_desc *acpi_desc, + struct nd_cmd_ars_cap *cmd, struct nfit_spa *nfit_spa) { - cmd->address = addr; - cmd->length = length; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + int cmd_rc, rc; - return nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, - sizeof(*cmd)); + cmd->address = spa->address; + cmd->length = spa->length; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, cmd, + sizeof(*cmd), &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; } -static int ars_do_start(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_start *cmd, u64 addr, u64 length) +static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa) { int rc; + int cmd_rc; + struct nd_cmd_ars_start ars_start; + struct acpi_nfit_system_address *spa = nfit_spa->spa; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - cmd->address = addr; - cmd->length = length; - cmd->type = ND_ARS_PERSISTENT; + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = spa->address; + ars_start.length = spa->length; + if (nfit_spa_type(spa) == NFIT_SPA_PM) + ars_start.type = ND_ARS_PERSISTENT; + else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) + ars_start.type = ND_ARS_VOLATILE; + else + return -ENOTTY; - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, cmd, - sizeof(*cmd)); - if (rc) - return rc; - switch (cmd->status) { - case 0: - return 0; - case 1: - /* ARS unsupported, but we should never get here */ - return 0; - case 6: - /* ARS is in progress */ - msleep(1000); - break; - default: - return -ENXIO; - } - } + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); + + if (rc < 0) + return rc; + return cmd_rc; } -static int ars_get_status(struct nvdimm_bus_descriptor *nd_desc, - struct nd_cmd_ars_status *cmd, u32 size) +static int ars_continue(struct acpi_nfit_desc *acpi_desc) { - int rc; + int rc, cmd_rc; + struct nd_cmd_ars_start ars_start; + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + + memset(&ars_start, 0, sizeof(ars_start)); + ars_start.address = ars_status->restart_address; + ars_start.length = ars_status->restart_length; + ars_start.type = ars_status->type; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_START, &ars_start, + sizeof(ars_start), &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; +} - while (1) { - rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, cmd, - size); - if (rc || cmd->status & 0xffff) - return -ENXIO; +static int ars_get_status(struct acpi_nfit_desc *acpi_desc) +{ + struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; + struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status; + int rc, cmd_rc; - /* Check extended status (Upper two bytes) */ - switch (cmd->status >> 16) { - case 0: - return 0; - case 1: - /* ARS is in progress */ - msleep(1000); - break; - case 2: - /* No ARS performed for the current boot */ - return 0; - case 3: - /* TODO: error list overflow support */ - default: - return -ENXIO; - } - } + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status, + acpi_desc->ars_status_size, &cmd_rc); + if (rc < 0) + return rc; + return cmd_rc; } static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, - struct nd_cmd_ars_status *ars_status, u64 start) + struct nd_cmd_ars_status *ars_status) { int rc; u32 i; - /* - * The address field returned by ars_status should be either - * less than or equal to the address we last started ARS for. - * The (start, length) returned by ars_status should also have - * non-zero overlap with the range we started ARS for. - * If this is not the case, bail. - */ - if (ars_status->address > start || - (ars_status->address + ars_status->length < start)) - return -ENXIO; - for (i = 0; i < ars_status->num_records; i++) { rc = nvdimm_bus_add_poison(nvdimm_bus, ars_status->records[i].err_address, @@ -1571,118 +1658,56 @@ static int ars_status_process_records(struct nvdimm_bus *nvdimm_bus, return 0; } -static int acpi_nfit_find_poison(struct acpi_nfit_desc *acpi_desc, - struct nd_region_desc *ndr_desc) +static void acpi_nfit_remove_resource(void *data) { - struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc; - struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus; - struct nd_cmd_ars_status *ars_status = NULL; - struct nd_cmd_ars_start *ars_start = NULL; - struct nd_cmd_ars_cap *ars_cap = NULL; - u64 start, len, cur, remaining; - u32 ars_status_size; - int rc; - - ars_cap = kzalloc(sizeof(*ars_cap), GFP_KERNEL); - if (!ars_cap) - return -ENOMEM; + struct resource *res = data; - start = ndr_desc->res->start; - len = ndr_desc->res->end - ndr_desc->res->start + 1; - - /* - * If ARS is unimplemented, unsupported, or if the 'Persistent Memory - * Scrub' flag in extended status is not set, skip this but continue - * initialization - */ - rc = ars_get_cap(nd_desc, ars_cap, start, len); - if (rc == -ENOTTY) { - dev_dbg(acpi_desc->dev, - "Address Range Scrub is not implemented, won't create an error list\n"); - rc = 0; - goto out; - } - if (rc) - goto out; - - if ((ars_cap->status & 0xffff) || - !(ars_cap->status >> 16 & ND_ARS_PERSISTENT)) { - dev_warn(acpi_desc->dev, - "ARS unsupported (status: 0x%x), won't create an error list\n", - ars_cap->status); - goto out; - } - - /* - * Check if a full-range ARS has been run. If so, use those results - * without having to start a new ARS. - */ - ars_status_size = ars_cap->max_ars_out; - ars_status = kzalloc(ars_status_size, GFP_KERNEL); - if (!ars_status) { - rc = -ENOMEM; - goto out; - } + remove_resource(res); +} - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) - goto out; +static int acpi_nfit_insert_resource(struct acpi_nfit_desc *acpi_desc, + struct nd_region_desc *ndr_desc) +{ + struct resource *res, *nd_res = ndr_desc->res; + int is_pmem, ret; - if (ars_status->address <= start && - (ars_status->address + ars_status->length >= start + len)) { - rc = ars_status_process_records(nvdimm_bus, ars_status, start); - goto out; - } + /* No operation if the region is already registered as PMEM */ + is_pmem = region_intersects(nd_res->start, resource_size(nd_res), + IORESOURCE_MEM, IORES_DESC_PERSISTENT_MEMORY); + if (is_pmem == REGION_INTERSECTS) + return 0; - /* - * ARS_STATUS can overflow if the number of poison entries found is - * greater than the maximum buffer size (ars_cap->max_ars_out) - * To detect overflow, check if the length field of ars_status - * is less than the length we supplied. If so, process the - * error entries we got, adjust the start point, and start again - */ - ars_start = kzalloc(sizeof(*ars_start), GFP_KERNEL); - if (!ars_start) + res = devm_kzalloc(acpi_desc->dev, sizeof(*res), GFP_KERNEL); + if (!res) return -ENOMEM; - cur = start; - remaining = len; - do { - u64 done, end; - - rc = ars_do_start(nd_desc, ars_start, cur, remaining); - if (rc) - goto out; - - rc = ars_get_status(nd_desc, ars_status, ars_status_size); - if (rc) - goto out; + res->name = "Persistent Memory"; + res->start = nd_res->start; + res->end = nd_res->end; + res->flags = IORESOURCE_MEM; + res->desc = IORES_DESC_PERSISTENT_MEMORY; - rc = ars_status_process_records(nvdimm_bus, ars_status, cur); - if (rc) - goto out; + ret = insert_resource(&iomem_resource, res); + if (ret) + return ret; - end = min(cur + remaining, - ars_status->address + ars_status->length); - done = end - cur; - cur += done; - remaining -= done; - } while (remaining); + ret = devm_add_action(acpi_desc->dev, acpi_nfit_remove_resource, res); + if (ret) { + remove_resource(res); + return ret; + } - out: - kfree(ars_cap); - kfree(ars_start); - kfree(ars_status); - return rc; + return 0; } static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, struct nd_mapping *nd_mapping, struct nd_region_desc *ndr_desc, struct acpi_nfit_memory_map *memdev, - struct acpi_nfit_system_address *spa) + struct nfit_spa *nfit_spa) { struct nvdimm *nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, memdev->device_handle); + struct acpi_nfit_system_address *spa = nfit_spa->spa; struct nd_blk_region_desc *ndbr_desc; struct nfit_mem *nfit_mem; int blk_valid = 0; @@ -1718,7 +1743,9 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc, ndbr_desc->enable = acpi_nfit_blk_region_enable; ndbr_desc->disable = acpi_nfit_blk_region_disable; ndbr_desc->do_io = acpi_desc->blk_do_io; - if (!nvdimm_blk_region_create(acpi_desc->nvdimm_bus, ndr_desc)) + nfit_spa->nd_region = nvdimm_blk_region_create(acpi_desc->nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) return -ENOMEM; break; } @@ -1738,7 +1765,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, struct resource res; int count = 0, rc; - if (nfit_spa->is_registered) + if (nfit_spa->nd_region) return 0; if (spa->range_index == 0) { @@ -1775,47 +1802,332 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc, } nd_mapping = &nd_mappings[count++]; rc = acpi_nfit_init_mapping(acpi_desc, nd_mapping, ndr_desc, - memdev, spa); + memdev, nfit_spa); if (rc) - return rc; + goto out; } ndr_desc->nd_mapping = nd_mappings; ndr_desc->num_mappings = count; rc = acpi_nfit_init_interleave_set(acpi_desc, ndr_desc, spa); if (rc) - return rc; + goto out; nvdimm_bus = acpi_desc->nvdimm_bus; if (nfit_spa_type(spa) == NFIT_SPA_PM) { - rc = acpi_nfit_find_poison(acpi_desc, ndr_desc); + rc = acpi_nfit_insert_resource(acpi_desc, ndr_desc); if (rc) { - dev_err(acpi_desc->dev, - "error while performing ARS to find poison: %d\n", + dev_warn(acpi_desc->dev, + "failed to insert pmem resource to iomem: %d\n", rc); - return rc; + goto out; } - if (!nvdimm_pmem_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + + nfit_spa->nd_region = nvdimm_pmem_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; } else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE) { - if (!nvdimm_volatile_region_create(nvdimm_bus, ndr_desc)) - return -ENOMEM; + nfit_spa->nd_region = nvdimm_volatile_region_create(nvdimm_bus, + ndr_desc); + if (!nfit_spa->nd_region) + rc = -ENOMEM; } - nfit_spa->is_registered = 1; + out: + if (rc) + dev_err(acpi_desc->dev, "failed to register spa range %d\n", + nfit_spa->spa->range_index); + return rc; +} + +static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc, + u32 max_ars) +{ + struct device *dev = acpi_desc->dev; + struct nd_cmd_ars_status *ars_status; + + if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) { + memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size); + return 0; + } + + if (acpi_desc->ars_status) + devm_kfree(dev, acpi_desc->ars_status); + acpi_desc->ars_status = NULL; + ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL); + if (!ars_status) + return -ENOMEM; + acpi_desc->ars_status = ars_status; + acpi_desc->ars_status_size = max_ars; return 0; } -static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) { + struct acpi_nfit_system_address *spa = nfit_spa->spa; + int rc; + + if (!nfit_spa->max_ars) { + struct nd_cmd_ars_cap ars_cap; + + memset(&ars_cap, 0, sizeof(ars_cap)); + rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa); + if (rc < 0) + return rc; + nfit_spa->max_ars = ars_cap.max_ars_out; + nfit_spa->clear_err_unit = ars_cap.clear_err_unit; + /* check that the supported scrub types match the spa type */ + if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE && + ((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0) + return -ENOTTY; + else if (nfit_spa_type(spa) == NFIT_SPA_PM && + ((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0) + return -ENOTTY; + } + + if (ars_status_alloc(acpi_desc, nfit_spa->max_ars)) + return -ENOMEM; + + rc = ars_get_status(acpi_desc); + if (rc < 0 && rc != -ENOSPC) + return rc; + + if (ars_status_process_records(acpi_desc->nvdimm_bus, + acpi_desc->ars_status)) + return -ENOMEM; + + return 0; +} + +static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc, + struct nfit_spa *nfit_spa) +{ + struct acpi_nfit_system_address *spa = nfit_spa->spa; + unsigned int overflow_retry = scrub_overflow_abort; + u64 init_ars_start = 0, init_ars_len = 0; + struct device *dev = acpi_desc->dev; + unsigned int tmo = scrub_timeout; + int rc; + + if (nfit_spa->ars_done || !nfit_spa->nd_region) + return; + + rc = ars_start(acpi_desc, nfit_spa); + /* + * If we timed out the initial scan we'll still be busy here, + * and will wait another timeout before giving up permanently. + */ + if (rc < 0 && rc != -EBUSY) + return; + + do { + u64 ars_start, ars_len; + + if (acpi_desc->cancel) + break; + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + if (rc == -ENOTTY) + break; + if (rc == -EBUSY && !tmo) { + dev_warn(dev, "range %d ars timeout, aborting\n", + spa->range_index); + break; + } + + if (rc == -EBUSY) { + /* + * Note, entries may be appended to the list + * while the lock is dropped, but the workqueue + * being active prevents entries being deleted / + * freed. + */ + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + mutex_lock(&acpi_desc->init_mutex); + continue; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + if (!init_ars_len) { + init_ars_len = acpi_desc->ars_status->length; + init_ars_start = acpi_desc->ars_status->address; + } + rc = ars_continue(acpi_desc); + } + + if (rc < 0) { + dev_warn(dev, "range %d ars continuation failed\n", + spa->range_index); + break; + } + + if (init_ars_len) { + ars_start = init_ars_start; + ars_len = init_ars_len; + } else { + ars_start = acpi_desc->ars_status->address; + ars_len = acpi_desc->ars_status->length; + } + dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n", + spa->range_index, ars_start, ars_len); + /* notify the region about new poison entries */ + nvdimm_region_notify(nfit_spa->nd_region, + NVDIMM_REVALIDATE_POISON); + break; + } while (1); +} + +static void acpi_nfit_scrub(struct work_struct *work) +{ + struct device *dev; + u64 init_scrub_length = 0; struct nfit_spa *nfit_spa; + u64 init_scrub_address = 0; + bool init_ars_done = false; + struct acpi_nfit_desc *acpi_desc; + unsigned int tmo = scrub_timeout; + unsigned int overflow_retry = scrub_overflow_abort; + + acpi_desc = container_of(work, typeof(*acpi_desc), work); + dev = acpi_desc->dev; + /* + * We scrub in 2 phases. The first phase waits for any platform + * firmware initiated scrubs to complete and then we go search for the + * affected spa regions to mark them scanned. In the second phase we + * initiate a directed scrub for every range that was not scrubbed in + * phase 1. + */ + + /* process platform firmware initiated scrubs */ + retry: + mutex_lock(&acpi_desc->init_mutex); list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { - int rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + struct nd_cmd_ars_status *ars_status; + struct acpi_nfit_system_address *spa; + u64 ars_start, ars_len; + int rc; - if (rc) - return rc; + if (acpi_desc->cancel) + break; + + if (nfit_spa->nd_region) + continue; + + if (init_ars_done) { + /* + * No need to re-query, we're now just + * reconciling all the ranges covered by the + * initial scrub + */ + rc = 0; + } else + rc = acpi_nfit_query_poison(acpi_desc, nfit_spa); + + if (rc == -ENOTTY) { + /* no ars capability, just register spa and move on */ + acpi_nfit_register_region(acpi_desc, nfit_spa); + continue; + } + + if (rc == -EBUSY && !tmo) { + /* fallthrough to directed scrub in phase 2 */ + dev_warn(dev, "timeout awaiting ars results, continuing...\n"); + break; + } else if (rc == -EBUSY) { + mutex_unlock(&acpi_desc->init_mutex); + ssleep(1); + tmo--; + goto retry; + } + + /* we got some results, but there are more pending... */ + if (rc == -ENOSPC && overflow_retry--) { + ars_status = acpi_desc->ars_status; + /* + * Record the original scrub range, so that we + * can recall all the ranges impacted by the + * initial scrub. + */ + if (!init_scrub_length) { + init_scrub_length = ars_status->length; + init_scrub_address = ars_status->address; + } + rc = ars_continue(acpi_desc); + if (rc == 0) { + mutex_unlock(&acpi_desc->init_mutex); + goto retry; + } + } + + if (rc < 0) { + /* + * Initial scrub failed, we'll give it one more + * try below... + */ + break; + } + + /* We got some final results, record completed ranges */ + ars_status = acpi_desc->ars_status; + if (init_scrub_length) { + ars_start = init_scrub_address; + ars_len = ars_start + init_scrub_length; + } else { + ars_start = ars_status->address; + ars_len = ars_status->length; + } + spa = nfit_spa->spa; + + if (!init_ars_done) { + init_ars_done = true; + dev_dbg(dev, "init scrub %#llx + %#llx complete\n", + ars_start, ars_len); + } + if (ars_start <= spa->address && ars_start + ars_len + >= spa->address + spa->length) + acpi_nfit_register_region(acpi_desc, nfit_spa); } + + /* + * For all the ranges not covered by an initial scrub we still + * want to see if there are errors, but it's ok to discover them + * asynchronously. + */ + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) { + /* + * Flag all the ranges that still need scrubbing, but + * register them now to make data available. + */ + if (nfit_spa->nd_region) + nfit_spa->ars_done = 1; + else + acpi_nfit_register_region(acpi_desc, nfit_spa); + } + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + acpi_nfit_async_scrub(acpi_desc, nfit_spa); + mutex_unlock(&acpi_desc->init_mutex); +} + +static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc) +{ + struct nfit_spa *nfit_spa; + int rc; + + list_for_each_entry(nfit_spa, &acpi_desc->spas, list) + if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) { + /* BLK regions don't need to wait for ars results */ + rc = acpi_nfit_register_region(acpi_desc, nfit_spa); + if (rc) + return rc; + } + + queue_work(nfit_wq, &acpi_desc->work); return 0; } @@ -1901,15 +2213,64 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz) } EXPORT_SYMBOL_GPL(acpi_nfit_init); -static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) +struct acpi_nfit_flush_work { + struct work_struct work; + struct completion cmp; +}; + +static void flush_probe(struct work_struct *work) { - struct nvdimm_bus_descriptor *nd_desc; - struct acpi_nfit_desc *acpi_desc; - struct device *dev = &adev->dev; + struct acpi_nfit_flush_work *flush; - acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); - if (!acpi_desc) - return ERR_PTR(-ENOMEM); + flush = container_of(work, typeof(*flush), work); + complete(&flush->cmp); +} + +static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + struct device *dev = acpi_desc->dev; + struct acpi_nfit_flush_work flush; + + /* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */ + device_lock(dev); + device_unlock(dev); + + /* + * Scrub work could take 10s of seconds, userspace may give up so we + * need to be interruptible while waiting. + */ + INIT_WORK_ONSTACK(&flush.work, flush_probe); + COMPLETION_INITIALIZER_ONSTACK(flush.cmp); + queue_work(nfit_wq, &flush.work); + return wait_for_completion_interruptible(&flush.cmp); +} + +static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc); + + if (nvdimm) + return 0; + if (cmd != ND_CMD_ARS_START) + return 0; + + /* + * The kernel and userspace may race to initiate a scrub, but + * the scrub thread is prepared to lose that initial race. It + * just needs guarantees that any ars it initiates are not + * interrupted by any intervening start reqeusts from userspace. + */ + if (work_busy(&acpi_desc->work)) + return -EBUSY; + + return 0; +} + +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev) +{ + struct nvdimm_bus_descriptor *nd_desc; dev_set_drvdata(dev, acpi_desc); acpi_desc->dev = dev; @@ -1917,14 +2278,10 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) nd_desc = &acpi_desc->nd_desc; nd_desc->provider_name = "ACPI.NFIT"; nd_desc->ndctl = acpi_nfit_ctl; + nd_desc->flush_probe = acpi_nfit_flush_probe; + nd_desc->clear_to_send = acpi_nfit_clear_to_send; nd_desc->attr_groups = acpi_nfit_attribute_groups; - acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, nd_desc); - if (!acpi_desc->nvdimm_bus) { - devm_kfree(dev, acpi_desc); - return ERR_PTR(-ENXIO); - } - INIT_LIST_HEAD(&acpi_desc->spa_maps); INIT_LIST_HEAD(&acpi_desc->spas); INIT_LIST_HEAD(&acpi_desc->dcrs); @@ -1935,9 +2292,9 @@ static struct acpi_nfit_desc *acpi_nfit_desc_init(struct acpi_device *adev) INIT_LIST_HEAD(&acpi_desc->dimms); mutex_init(&acpi_desc->spa_map_mutex); mutex_init(&acpi_desc->init_mutex); - - return acpi_desc; + INIT_WORK(&acpi_desc->work, acpi_nfit_scrub); } +EXPORT_SYMBOL_GPL(acpi_nfit_desc_init); static int acpi_nfit_add(struct acpi_device *adev) { @@ -1956,12 +2313,13 @@ static int acpi_nfit_add(struct acpi_device *adev) return 0; } - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); - return PTR_ERR(acpi_desc); - } + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) + return -ENOMEM; + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENOMEM; /* * Save the acpi header for later and then skip it, @@ -2000,6 +2358,8 @@ static int acpi_nfit_remove(struct acpi_device *adev) { struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(&adev->dev); + acpi_desc->cancel = 1; + flush_workqueue(nfit_wq); nvdimm_bus_unregister(acpi_desc->nvdimm_bus); return 0; } @@ -2024,12 +2384,19 @@ static void acpi_nfit_notify(struct acpi_device *adev, u32 event) } if (!acpi_desc) { - acpi_desc = acpi_nfit_desc_init(adev); - if (IS_ERR(acpi_desc)) { - dev_err(dev, "%s: error initializing acpi_desc: %ld\n", - __func__, PTR_ERR(acpi_desc)); + acpi_desc = devm_kzalloc(dev, sizeof(*acpi_desc), GFP_KERNEL); + if (!acpi_desc) goto out_unlock; - } + acpi_nfit_desc_init(acpi_desc, &adev->dev); + acpi_desc->nvdimm_bus = nvdimm_bus_register(dev, &acpi_desc->nd_desc); + if (!acpi_desc->nvdimm_bus) + goto out_unlock; + } else { + /* + * Finish previous registration before considering new + * regions. + */ + flush_workqueue(nfit_wq); } /* Evaluate _FIT */ @@ -2097,12 +2464,17 @@ static __init int nfit_init(void) acpi_str_to_uuid(UUID_NFIT_BUS, nfit_uuid[NFIT_DEV_BUS]); acpi_str_to_uuid(UUID_NFIT_DIMM, nfit_uuid[NFIT_DEV_DIMM]); + nfit_wq = create_singlethread_workqueue("nfit"); + if (!nfit_wq) + return -ENOMEM; + return acpi_bus_register_driver(&acpi_nfit_driver); } static __exit void nfit_exit(void) { acpi_bus_unregister_driver(&acpi_nfit_driver); + destroy_workqueue(nfit_wq); } module_init(nfit_init); diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h index 3d549a383659..c75576b2d50e 100644 --- a/drivers/acpi/nfit.h +++ b/drivers/acpi/nfit.h @@ -14,6 +14,7 @@ */ #ifndef __NFIT_H__ #define __NFIT_H__ +#include <linux/workqueue.h> #include <linux/libnvdimm.h> #include <linux/types.h> #include <linux/uuid.h> @@ -40,15 +41,32 @@ enum nfit_uuids { NFIT_UUID_MAX, }; +enum nfit_fic { + NFIT_FIC_BYTE = 0x101, /* byte-addressable energy backed */ + NFIT_FIC_BLK = 0x201, /* block-addressable non-energy backed */ + NFIT_FIC_BYTEN = 0x301, /* byte-addressable non-energy backed */ +}; + enum { - ND_BLK_READ_FLUSH = 1, - ND_BLK_DCR_LATCH = 2, + NFIT_BLK_READ_FLUSH = 1, + NFIT_BLK_DCR_LATCH = 2, + NFIT_ARS_STATUS_DONE = 0, + NFIT_ARS_STATUS_BUSY = 1 << 16, + NFIT_ARS_STATUS_NONE = 2 << 16, + NFIT_ARS_STATUS_INTR = 3 << 16, + NFIT_ARS_START_BUSY = 6, + NFIT_ARS_CAP_NONE = 1, + NFIT_ARS_F_OVERFLOW = 1, + NFIT_ARS_TIMEOUT = 90, }; struct nfit_spa { struct acpi_nfit_system_address *spa; struct list_head list; - int is_registered; + struct nd_region *nd_region; + unsigned int ars_done:1; + u32 clear_err_unit; + u32 max_ars; }; struct nfit_dcr { @@ -110,6 +128,10 @@ struct acpi_nfit_desc { struct list_head idts; struct nvdimm_bus *nvdimm_bus; struct device *dev; + struct nd_cmd_ars_status *ars_status; + size_t ars_status_size; + struct work_struct work; + unsigned int cancel:1; unsigned long dimm_dsm_force_en; unsigned long bus_dsm_force_en; int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa, @@ -182,5 +204,5 @@ static inline struct acpi_nfit_desc *to_acpi_desc( const u8 *to_nfit_uuid(enum nfit_uuids id); int acpi_nfit_init(struct acpi_nfit_desc *nfit, acpi_size sz); -extern const struct attribute_group *acpi_nfit_attribute_groups[]; +void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev); #endif /* __NFIT_H__ */ diff --git a/drivers/nvdimm/blk.c b/drivers/nvdimm/blk.c index 91a336ea8c4f..e9ff9229d942 100644 --- a/drivers/nvdimm/blk.c +++ b/drivers/nvdimm/blk.c @@ -31,8 +31,6 @@ struct nd_blk_device { u32 internal_lbasize; }; -static int nd_blk_major; - static u32 nd_blk_meta_size(struct nd_blk_device *blk_dev) { return blk_dev->nsblk->lbasize - blk_dev->sector_size; @@ -264,7 +262,6 @@ static int nd_blk_attach_disk(struct nd_namespace_common *ndns, } disk->driverfs_dev = &ndns->dev; - disk->major = nd_blk_major; disk->first_minor = 0; disk->fops = &nd_blk_fops; disk->private_data = blk_dev; @@ -358,25 +355,12 @@ static struct nd_device_driver nd_blk_driver = { static int __init nd_blk_init(void) { - int rc; - - rc = register_blkdev(0, "nd_blk"); - if (rc < 0) - return rc; - - nd_blk_major = rc; - rc = nd_driver_register(&nd_blk_driver); - - if (rc < 0) - unregister_blkdev(nd_blk_major, "nd_blk"); - - return rc; + return nd_driver_register(&nd_blk_driver); } static void __exit nd_blk_exit(void) { driver_unregister(&nd_blk_driver.drv); - unregister_blkdev(nd_blk_major, "nd_blk"); } MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>"); diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index efb2c1ceef98..c32cbb593600 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -31,8 +31,6 @@ enum log_ent_request { LOG_OLD_ENT }; -static int btt_major; - static int arena_read_bytes(struct arena_info *arena, resource_size_t offset, void *buf, size_t n) { @@ -1246,7 +1244,6 @@ static int btt_blk_init(struct btt *btt) nvdimm_namespace_disk_name(ndns, btt->btt_disk->disk_name); btt->btt_disk->driverfs_dev = &btt->nd_btt->dev; - btt->btt_disk->major = btt_major; btt->btt_disk->first_minor = 0; btt->btt_disk->fops = &btt_fops; btt->btt_disk->private_data = btt; @@ -1423,22 +1420,11 @@ EXPORT_SYMBOL(nvdimm_namespace_detach_btt); static int __init nd_btt_init(void) { - int rc; - - btt_major = register_blkdev(0, "btt"); - if (btt_major < 0) - return btt_major; + int rc = 0; debugfs_root = debugfs_create_dir("btt", NULL); - if (IS_ERR_OR_NULL(debugfs_root)) { + if (IS_ERR_OR_NULL(debugfs_root)) rc = -ENXIO; - goto err_debugfs; - } - - return 0; - - err_debugfs: - unregister_blkdev(btt_major, "btt"); return rc; } @@ -1446,7 +1432,6 @@ static int __init nd_btt_init(void) static void __exit nd_btt_exit(void) { debugfs_remove_recursive(debugfs_root); - unregister_blkdev(btt_major, "btt"); } MODULE_ALIAS_ND_DEVICE(ND_DEVICE_BTT); diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c index 5d28e9405f32..33557481d452 100644 --- a/drivers/nvdimm/bus.c +++ b/drivers/nvdimm/bus.c @@ -133,6 +133,78 @@ static int nvdimm_bus_remove(struct device *dev) return rc; } +void nd_device_notify(struct device *dev, enum nvdimm_event event) +{ + device_lock(dev); + if (dev->driver) { + struct nd_device_driver *nd_drv; + + nd_drv = to_nd_device_driver(dev->driver); + if (nd_drv->notify) + nd_drv->notify(dev, event); + } + device_unlock(dev); +} +EXPORT_SYMBOL(nd_device_notify); + +void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev); + + if (!nvdimm_bus) + return; + + /* caller is responsible for holding a reference on the device */ + nd_device_notify(&nd_region->dev, event); +} +EXPORT_SYMBOL_GPL(nvdimm_region_notify); + +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len) +{ + struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc; + struct nd_cmd_clear_error clear_err; + struct nd_cmd_ars_cap ars_cap; + u32 clear_err_unit, mask; + int cmd_rc, rc; + + if (!nvdimm_bus) + return -ENXIO; + + nd_desc = nvdimm_bus->nd_desc; + if (!nd_desc->ndctl) + return -ENXIO; + + memset(&ars_cap, 0, sizeof(ars_cap)); + ars_cap.address = phys; + ars_cap.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_CAP, &ars_cap, + sizeof(ars_cap), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + clear_err_unit = ars_cap.clear_err_unit; + if (!clear_err_unit || !is_power_of_2(clear_err_unit)) + return -ENXIO; + + mask = clear_err_unit - 1; + if ((phys | len) & mask) + return -ENXIO; + memset(&clear_err, 0, sizeof(clear_err)); + clear_err.address = phys; + clear_err.length = len; + rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_CLEAR_ERROR, &clear_err, + sizeof(clear_err), &cmd_rc); + if (rc < 0) + return rc; + if (cmd_rc < 0) + return cmd_rc; + return clear_err.cleared; +} +EXPORT_SYMBOL_GPL(nvdimm_clear_poison); + static struct bus_type nvdimm_bus_type = { .name = "nd", .uevent = nvdimm_bus_uevent, @@ -395,6 +467,12 @@ static const struct nd_cmd_desc __nd_cmd_bus_descs[] = { .out_num = 3, .out_sizes = { 4, 4, UINT_MAX, }, }, + [ND_CMD_CLEAR_ERROR] = { + .in_num = 2, + .in_sizes = { 8, 8, }, + .out_num = 3, + .out_sizes = { 4, 4, 8, }, + }, }; const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd) @@ -463,17 +541,37 @@ void wait_nvdimm_bus_probe_idle(struct device *dev) } while (true); } +static int pmem_active(struct device *dev, void *data) +{ + if (is_nd_pmem(dev) && dev->driver) + return -EBUSY; + return 0; +} + /* set_config requires an idle interleave set */ -static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd) +static int nd_cmd_clear_to_send(struct nvdimm_bus *nvdimm_bus, + struct nvdimm *nvdimm, unsigned int cmd) { - struct nvdimm_bus *nvdimm_bus; + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + + /* ask the bus provider if it would like to block this request */ + if (nd_desc->clear_to_send) { + int rc = nd_desc->clear_to_send(nd_desc, nvdimm, cmd); + + if (rc) + return rc; + } + + /* require clear error to go through the pmem driver */ + if (!nvdimm && cmd == ND_CMD_CLEAR_ERROR) + return device_for_each_child(&nvdimm_bus->dev, NULL, + pmem_active); if (!nvdimm || cmd != ND_CMD_SET_CONFIG_DATA) return 0; - nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev); + /* prevent label manipulation while the kernel owns label updates */ wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev); - if (atomic_read(&nvdimm->busy)) return -EBUSY; return 0; @@ -513,10 +611,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, /* fail write commands (when read-only) */ if (read_only) - switch (ioctl_cmd) { - case ND_IOCTL_VENDOR: - case ND_IOCTL_SET_CONFIG_DATA: - case ND_IOCTL_ARS_START: + switch (cmd) { + case ND_CMD_VENDOR: + case ND_CMD_SET_CONFIG_DATA: + case ND_CMD_ARS_START: + case ND_CMD_CLEAR_ERROR: dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n", nvdimm ? nvdimm_cmd_name(cmd) : nvdimm_bus_cmd_name(cmd)); @@ -583,11 +682,11 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, } nvdimm_bus_lock(&nvdimm_bus->dev); - rc = nd_cmd_clear_to_send(nvdimm, cmd); + rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, cmd); if (rc) goto out_unlock; - rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len); + rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len, NULL); if (rc < 0) goto out_unlock; if (copy_to_user(p, buf, buf_len)) @@ -602,14 +701,14 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm, static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long id = (long) file->private_data; - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { if (nvdimm_bus->id == id) { - rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg); break; } } @@ -633,10 +732,10 @@ static int match_dimm(struct device *dev, void *data) static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { - int rc = -ENXIO, read_only; + int rc = -ENXIO, ro; struct nvdimm_bus *nvdimm_bus; - read_only = (O_RDWR != (file->f_flags & O_ACCMODE)); + ro = ((file->f_flags & O_ACCMODE) == O_RDONLY); mutex_lock(&nvdimm_bus_list_mutex); list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) { struct device *dev = device_find_child(&nvdimm_bus->dev, @@ -647,7 +746,7 @@ static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg) continue; nvdimm = to_nvdimm(dev); - rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg); + rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg); put_device(dev); break; } diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c index 2e2832b83c93..79646d0c3277 100644 --- a/drivers/nvdimm/core.c +++ b/drivers/nvdimm/core.c @@ -298,6 +298,15 @@ static int flush_regions_dimms(struct device *dev, void *data) static ssize_t wait_probe_show(struct device *dev, struct device_attribute *attr, char *buf) { + struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev); + struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc; + int rc; + + if (nd_desc->flush_probe) { + rc = nd_desc->flush_probe(nd_desc); + if (rc) + return rc; + } nd_synchronize(); device_for_each_child(dev, NULL, flush_regions_dimms); return sprintf(buf, "1\n"); @@ -408,33 +417,11 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len) set_badblock(bb, start_sector, num_sectors); } -/** - * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks - * @ndns: the namespace containing poison ranges - * @bb: badblocks instance to populate - * @offset: offset at the start of the namespace before 'sector 0' - * - * The poison list generated during NFIT initialization may contain multiple, - * possibly overlapping ranges in the SPA (System Physical Address) space. - * Compare each of these ranges to the namespace currently being initialized, - * and add badblocks to the gendisk for all matching sub-ranges - */ -void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, - struct badblocks *bb, resource_size_t offset) +static void namespace_add_poison(struct list_head *poison_list, + struct badblocks *bb, struct resource *res) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - struct nvdimm_bus *nvdimm_bus; - struct list_head *poison_list; - u64 ns_start, ns_end, ns_size; struct nd_poison *pl; - ns_size = nvdimm_namespace_capacity(ndns) - offset; - ns_start = nsio->res.start + offset; - ns_end = nsio->res.end; - - nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); - poison_list = &nvdimm_bus->poison_list; if (list_empty(poison_list)) return; @@ -442,37 +429,69 @@ void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, u64 pl_end = pl->start + pl->length - 1; /* Discard intervals with no intersection */ - if (pl_end < ns_start) + if (pl_end < res->start) continue; - if (pl->start > ns_end) + if (pl->start > res->end) continue; /* Deal with any overlap after start of the namespace */ - if (pl->start >= ns_start) { + if (pl->start >= res->start) { u64 start = pl->start; u64 len; - if (pl_end <= ns_end) + if (pl_end <= res->end) len = pl->length; else - len = ns_start + ns_size - pl->start; - __add_badblock_range(bb, start - ns_start, len); + len = res->start + resource_size(res) + - pl->start; + __add_badblock_range(bb, start - res->start, len); continue; } /* Deal with overlap for poison starting before the namespace */ - if (pl->start < ns_start) { + if (pl->start < res->start) { u64 len; - if (pl_end < ns_end) - len = pl->start + pl->length - ns_start; + if (pl_end < res->end) + len = pl->start + pl->length - res->start; else - len = ns_size; + len = resource_size(res); __add_badblock_range(bb, 0, len); } } } + +/** + * nvdimm_namespace_add_poison() - Convert a list of poison ranges to badblocks + * @ndns: the namespace containing poison ranges + * @bb: badblocks instance to populate + * @offset: offset at the start of the namespace before 'sector 0' + * + * The poison list generated during NFIT initialization may contain multiple, + * possibly overlapping ranges in the SPA (System Physical Address) space. + * Compare each of these ranges to the namespace currently being initialized, + * and add badblocks to the gendisk for all matching sub-ranges + */ +void nvdimm_namespace_add_poison(struct nd_namespace_common *ndns, + struct badblocks *bb, resource_size_t offset) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + struct nvdimm_bus *nvdimm_bus; + struct list_head *poison_list; + struct resource res = { + .start = nsio->res.start + offset, + .end = nsio->res.end, + }; + + nvdimm_bus = to_nvdimm_bus(nd_region->dev.parent); + poison_list = &nvdimm_bus->poison_list; + + nvdimm_bus_lock(&nvdimm_bus->dev); + namespace_add_poison(poison_list, bb, &res); + nvdimm_bus_unlock(&nvdimm_bus->dev); +} EXPORT_SYMBOL_GPL(nvdimm_namespace_add_poison); -static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; @@ -487,12 +506,12 @@ static int __add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) return 0; } -int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +static int bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) { struct nd_poison *pl; if (list_empty(&nvdimm_bus->poison_list)) - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); /* * There is a chance this is a duplicate, check for those first. @@ -512,7 +531,18 @@ int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) * as any overlapping ranges will get resolved when the list is consumed * and converted to badblocks */ - return __add_poison(nvdimm_bus, addr, length); + return add_poison(nvdimm_bus, addr, length); +} + +int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length) +{ + int rc; + + nvdimm_bus_lock(&nvdimm_bus->dev); + rc = bus_add_poison(nvdimm_bus, addr, length); + nvdimm_bus_unlock(&nvdimm_bus->dev); + + return rc; } EXPORT_SYMBOL_GPL(nvdimm_bus_add_poison); @@ -553,7 +583,11 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus) nd_synchronize(); device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister); + + nvdimm_bus_lock(&nvdimm_bus->dev); free_poison_list(&nvdimm_bus->poison_list); + nvdimm_bus_unlock(&nvdimm_bus->dev); + nvdimm_bus_destroy_ndctl(nvdimm_bus); device_unregister(&nvdimm_bus->dev); diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index 651b8d19d324..c56f88217924 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -75,7 +75,7 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd) memset(cmd, 0, sizeof(*cmd)); nd_desc = nvdimm_bus->nd_desc; return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd)); + ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd), NULL); } int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) @@ -120,7 +120,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd) cmd->in_offset = offset; rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), ND_CMD_GET_CONFIG_DATA, cmd, - cmd->in_length + sizeof(*cmd)); + cmd->in_length + sizeof(*cmd), NULL); if (rc || cmd->status) { rc = -ENXIO; break; @@ -171,7 +171,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, status = ((void *) cmd) + cmd_size - sizeof(u32); rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev), - ND_CMD_SET_CONFIG_DATA, cmd, cmd_size); + ND_CMD_SET_CONFIG_DATA, cmd, cmd_size, NULL); if (rc || *status) { rc = rc ? rc : -ENXIO; break; diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 9edf7eb7d17c..f5cb88601359 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -133,6 +133,7 @@ bool nd_is_uuid_unique(struct device *dev, u8 *uuid) bool pmem_should_map_pages(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_namespace_io *nsio; if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) return false; @@ -143,6 +144,12 @@ bool pmem_should_map_pages(struct device *dev) if (is_nd_pfn(dev) || is_nd_btt(dev)) return false; + nsio = to_nd_namespace_io(dev); + if (region_intersects(nsio->res.start, resource_size(&nsio->res), + IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) + return false; + #ifdef ARCH_MEMREMAP_PMEM return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; #else diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index ba1633b9da31..1799bd97a9ce 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -18,6 +18,7 @@ #include <linux/mutex.h> #include <linux/ndctl.h> #include <linux/types.h> +#include <linux/nd.h> #include "label.h" enum { @@ -168,6 +169,7 @@ int nd_integrity_init(struct gendisk *disk, unsigned long meta_size); void wait_nvdimm_bus_probe_idle(struct device *dev); void nd_device_register(struct device *dev); void nd_device_unregister(struct device *dev, enum nd_async_mode mode); +void nd_device_notify(struct device *dev, enum nvdimm_event event); int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf, size_t len); ssize_t nd_sector_size_show(unsigned long current_lbasize, @@ -184,6 +186,8 @@ int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd); int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); +long nvdimm_clear_poison(struct device *dev, phys_addr_t phys, + unsigned int len); struct nd_btt *to_nd_btt(struct device *dev); struct nd_gen_sb { diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h index cc243754acef..8e343a3ca873 100644 --- a/drivers/nvdimm/pfn.h +++ b/drivers/nvdimm/pfn.h @@ -15,6 +15,7 @@ #define __NVDIMM_PFN_H #include <linux/types.h> +#include <linux/mmzone.h> #define PFN_SIG_LEN 16 #define PFN_SIG "NVDIMM_PFN_INFO\0" @@ -26,10 +27,28 @@ struct nd_pfn_sb { __le32 flags; __le16 version_major; __le16 version_minor; - __le64 dataoff; + __le64 dataoff; /* relative to namespace_base + start_pad */ __le64 npfns; __le32 mode; - u8 padding[4012]; + /* minor-version-1 additions for section alignment */ + __le32 start_pad; + __le32 end_trunc; + u8 padding[4004]; __le64 checksum; }; + +#ifdef CONFIG_SPARSEMEM +#define PFN_SECTION_ALIGN_DOWN(x) SECTION_ALIGN_DOWN(x) +#define PFN_SECTION_ALIGN_UP(x) SECTION_ALIGN_UP(x) +#else +/* + * In this case ZONE_DEVICE=n and we will disable 'pfn' device support, + * but we still want pmem to compile. + */ +#define PFN_SECTION_ALIGN_DOWN(x) (x) +#define PFN_SECTION_ALIGN_UP(x) (x) +#endif + +#define PHYS_SECTION_ALIGN_DOWN(x) PFN_PHYS(PFN_SECTION_ALIGN_DOWN(PHYS_PFN(x))) +#define PHYS_SECTION_ALIGN_UP(x) PFN_PHYS(PFN_SECTION_ALIGN_UP(PHYS_PFN(x))) #endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index ae81a2f1da50..254d3bc13f70 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -205,11 +205,67 @@ static ssize_t namespace_store(struct device *dev, } static DEVICE_ATTR_RW(namespace); +static ssize_t resource_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start + + start_pad + offset); + } else { + /* no address to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(resource); + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + if (dev->driver) { + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + u64 offset = __le64_to_cpu(pfn_sb->dataoff); + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + + rc = sprintf(buf, "%llu\n", (unsigned long long) + resource_size(&nsio->res) - start_pad + - end_trunc - offset); + } else { + /* no size to convey if the pfn instance is disabled */ + rc = -ENXIO; + } + device_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(size); + static struct attribute *nd_pfn_attributes[] = { &dev_attr_mode.attr, &dev_attr_namespace.attr, &dev_attr_uuid.attr, &dev_attr_align.attr, + &dev_attr_resource.attr, + &dev_attr_size.attr, NULL, }; @@ -299,6 +355,11 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn) if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) return -ENODEV; + if (__le16_to_cpu(pfn_sb->version_minor) < 1) { + pfn_sb->start_pad = 0; + pfn_sb->end_trunc = 0; + } + switch (le32_to_cpu(pfn_sb->mode)) { case PFN_MODE_RAM: case PFN_MODE_PMEM: diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 8d0b54670184..ca5721c306bb 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -43,12 +43,13 @@ struct pmem_device { phys_addr_t data_offset; u64 pfn_flags; void __pmem *virt_addr; + /* immutable base size of the namespace */ size_t size; + /* trim size when namespace capacity has been section aligned */ + u32 pfn_pad; struct badblocks bb; }; -static int pmem_major; - static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) { if (bb->count) { @@ -62,26 +63,56 @@ static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len) return false; } +static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset, + unsigned int len) +{ + struct device *dev = disk_to_dev(pmem->pmem_disk); + sector_t sector; + long cleared; + + sector = (offset - pmem->data_offset) / 512; + cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len); + + if (cleared > 0 && cleared / 512) { + dev_dbg(dev, "%s: %llx clear %ld sector%s\n", + __func__, (unsigned long long) sector, + cleared / 512, cleared / 512 > 1 ? "s" : ""); + badblocks_clear(&pmem->bb, sector, cleared / 512); + } + invalidate_pmem(pmem->virt_addr + offset, len); +} + static int pmem_do_bvec(struct pmem_device *pmem, struct page *page, unsigned int len, unsigned int off, int rw, sector_t sector) { + int rc = 0; + bool bad_pmem = false; void *mem = kmap_atomic(page); phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + bad_pmem = true; + if (rw == READ) { - if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) - return -EIO; - memcpy_from_pmem(mem + off, pmem_addr, len); - flush_dcache_page(page); + if (unlikely(bad_pmem)) + rc = -EIO; + else { + memcpy_from_pmem(mem + off, pmem_addr, len); + flush_dcache_page(page); + } } else { flush_dcache_page(page); memcpy_to_pmem(pmem_addr, mem + off, len); + if (unlikely(bad_pmem)) { + pmem_clear_poison(pmem, pmem_off, len); + memcpy_to_pmem(pmem_addr, mem + off, len); + } } kunmap_atomic(mem); - return 0; + return rc; } static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) @@ -145,7 +176,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, *kaddr = pmem->virt_addr + offset; *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags); - return pmem->size - offset; + return pmem->size - pmem->pfn_pad - offset; } static const struct block_device_operations pmem_fops = { @@ -228,15 +259,14 @@ static int pmem_attach_disk(struct device *dev, return -ENOMEM; } - disk->major = pmem_major; - disk->first_minor = 0; disk->fops = &pmem_fops; disk->private_data = pmem; disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); disk->driverfs_dev = dev; - set_capacity(disk, (pmem->size - pmem->data_offset) / 512); + set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset) + / 512); pmem->pmem_disk = disk; devm_exit_badblocks(dev, &pmem->bb); if (devm_init_badblocks(dev, &pmem->bb)) @@ -279,6 +309,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = 0, end_trunc = 0; + resource_size_t start, size; + struct nd_namespace_io *nsio; struct nd_region *nd_region; unsigned long npfns; phys_addr_t offset; @@ -304,21 +337,56 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) } memset(pfn_sb, 0, sizeof(*pfn_sb)); - npfns = (pmem->size - SZ_8K) / SZ_4K; + + /* + * Check if pmem collides with 'System RAM' when section aligned and + * trim it accordingly + */ + nsio = to_nd_namespace_io(&ndns->dev); + start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start); + size = resource_size(&nsio->res); + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + + start = nsio->res.start; + start_pad = PHYS_SECTION_ALIGN_UP(start) - start; + } + + start = nsio->res.start; + size = PHYS_SECTION_ALIGN_UP(start + size) - start; + if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM, + IORES_DESC_NONE) == REGION_MIXED) { + size = resource_size(&nsio->res); + end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size); + } + + if (start_pad + end_trunc) + dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n", + dev_name(&ndns->dev), start_pad + end_trunc); + /* * Note, we use 64 here for the standard size of struct page, * debugging options may cause it to be larger in which case the * implementation will limit the pfns advertised through * ->direct_access() to those that are included in the memmap. */ + start += start_pad; + npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K; if (nd_pfn->mode == PFN_MODE_PMEM) - offset = ALIGN(SZ_8K + 64 * npfns, nd_pfn->align); + offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align) + - start; else if (nd_pfn->mode == PFN_MODE_RAM) - offset = ALIGN(SZ_8K, nd_pfn->align); + offset = ALIGN(start + SZ_8K, nd_pfn->align) - start; else goto err; - npfns = (pmem->size - offset) / SZ_4K; + if (offset + start_pad + end_trunc >= pmem->size) { + dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", + dev_name(&ndns->dev)); + goto err; + } + + npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K; pfn_sb->mode = cpu_to_le32(nd_pfn->mode); pfn_sb->dataoff = cpu_to_le64(offset); pfn_sb->npfns = cpu_to_le64(npfns); @@ -326,6 +394,9 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn) memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); pfn_sb->version_major = cpu_to_le16(1); + pfn_sb->version_minor = cpu_to_le16(1); + pfn_sb->start_pad = cpu_to_le32(start_pad); + pfn_sb->end_trunc = cpu_to_le32(end_trunc); checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); pfn_sb->checksum = cpu_to_le64(checksum); @@ -356,41 +427,56 @@ static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) return 0; } -static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +/* + * We hotplug memory at section granularity, pad the reserved area from + * the previous section base to the namespace base address. + */ +static unsigned long init_altmap_base(resource_size_t base) +{ + unsigned long base_pfn = PHYS_PFN(base); + + return PFN_SECTION_ALIGN_DOWN(base_pfn); +} + +static unsigned long init_altmap_reserve(resource_size_t base) +{ + unsigned long reserve = PHYS_PFN(SZ_8K); + unsigned long base_pfn = PHYS_PFN(base); + + reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn); + return reserve; +} + +static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn) { - struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); - struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); - struct device *dev = &nd_pfn->dev; - struct nd_region *nd_region; - struct vmem_altmap *altmap; - struct nd_pfn_sb *pfn_sb; - struct pmem_device *pmem; - struct request_queue *q; - phys_addr_t offset; int rc; + struct resource res; + struct request_queue *q; + struct pmem_device *pmem; + struct vmem_altmap *altmap; + struct device *dev = &nd_pfn->dev; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_common *ndns = nd_pfn->ndns; + u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); + u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + resource_size_t base = nsio->res.start + start_pad; struct vmem_altmap __altmap = { - .base_pfn = __phys_to_pfn(nsio->res.start), - .reserve = __phys_to_pfn(SZ_8K), + .base_pfn = init_altmap_base(base), + .reserve = init_altmap_reserve(base), }; - if (!nd_pfn->uuid || !nd_pfn->ndns) - return -ENODEV; - - nd_region = to_nd_region(dev->parent); - rc = nd_pfn_init(nd_pfn); - if (rc) - return rc; - - pfn_sb = nd_pfn->pfn_sb; - offset = le64_to_cpu(pfn_sb->dataoff); + pmem = dev_get_drvdata(dev); + pmem->data_offset = le64_to_cpu(pfn_sb->dataoff); + pmem->pfn_pad = start_pad + end_trunc; nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); if (nd_pfn->mode == PFN_MODE_RAM) { - if (offset < SZ_8K) + if (pmem->data_offset < SZ_8K) return -EINVAL; nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); altmap = NULL; } else if (nd_pfn->mode == PFN_MODE_PMEM) { - nd_pfn->npfns = (resource_size(&nsio->res) - offset) + nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset) / PAGE_SIZE; if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) dev_info(&nd_pfn->dev, @@ -398,7 +484,7 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) le64_to_cpu(nd_pfn->pfn_sb->npfns), nd_pfn->npfns); altmap = & __altmap; - altmap->free = __phys_to_pfn(offset - SZ_8K); + altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K); altmap->alloc = 0; } else { rc = -ENXIO; @@ -406,10 +492,12 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* establish pfn range for lookup, and switch to direct map */ - pmem = dev_get_drvdata(dev); q = pmem->pmem_queue; + memcpy(&res, &nsio->res, sizeof(res)); + res.start += start_pad; + res.end -= end_trunc; devm_memunmap(dev, (void __force *) pmem->virt_addr); - pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &nsio->res, + pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res, &q->q_usage_counter, altmap); pmem->pfn_flags |= PFN_MAP; if (IS_ERR(pmem->virt_addr)) { @@ -418,7 +506,6 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) } /* attach pmem disk in "pfn-mode" */ - pmem->data_offset = offset; rc = pmem_attach_disk(dev, ndns, pmem); if (rc) goto err; @@ -427,6 +514,22 @@ static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) err: nvdimm_namespace_detach_pfn(ndns); return rc; + +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + /* we need a valid pfn_sb before we can init a vmem_altmap */ + return __nvdimm_namespace_attach_pfn(nd_pfn); } static int nd_pmem_probe(struct device *dev) @@ -488,12 +591,27 @@ static int nd_pmem_remove(struct device *dev) return 0; } +static void nd_pmem_notify(struct device *dev, enum nvdimm_event event) +{ + struct pmem_device *pmem = dev_get_drvdata(dev); + struct nd_namespace_common *ndns = pmem->ndns; + + if (event != NVDIMM_REVALIDATE_POISON) + return; + + if (is_nd_btt(dev)) + nvdimm_namespace_add_poison(ndns, &pmem->bb, 0); + else + nvdimm_namespace_add_poison(ndns, &pmem->bb, pmem->data_offset); +} + MODULE_ALIAS("pmem"); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO); MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM); static struct nd_device_driver nd_pmem_driver = { .probe = nd_pmem_probe, .remove = nd_pmem_remove, + .notify = nd_pmem_notify, .drv = { .name = "nd_pmem", }, @@ -502,26 +620,13 @@ static struct nd_device_driver nd_pmem_driver = { static int __init pmem_init(void) { - int error; - - pmem_major = register_blkdev(0, "pmem"); - if (pmem_major < 0) - return pmem_major; - - error = nd_driver_register(&nd_pmem_driver); - if (error) { - unregister_blkdev(pmem_major, "pmem"); - return error; - } - - return 0; + return nd_driver_register(&nd_pmem_driver); } module_init(pmem_init); static void pmem_exit(void) { driver_unregister(&nd_pmem_driver.drv); - unregister_blkdev(pmem_major, "pmem"); } module_exit(pmem_exit); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index 7da63eac78ee..4b7715e29cff 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -93,9 +93,21 @@ static int nd_region_remove(struct device *dev) return 0; } +static int child_notify(struct device *dev, void *data) +{ + nd_device_notify(dev, *(enum nvdimm_event *) data); + return 0; +} + +static void nd_region_notify(struct device *dev, enum nvdimm_event event) +{ + device_for_each_child(dev, &event, child_notify); +} + static struct nd_device_driver nd_region_driver = { .probe = nd_region_probe, .remove = nd_region_remove, + .notify = nd_region_notify, .drv = { .name = "nd_region", }, |