From 5e32940621eb62064d98f42c9889db71b0368bde Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 11 Jul 2015 10:02:46 -0400 Subject: libnvdimm, btt: sparse fix Fix: drivers/nvdimm/btt.c:635:29: warning: restricted __le64 degrades to integer Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 411c7b2bb37a..552f1c4f4dc6 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -632,8 +632,9 @@ static void parse_arena_meta(struct arena_info *arena, struct btt_sb *super, arena->logoff = arena_off + le64_to_cpu(super->logoff); arena->info2off = arena_off + le64_to_cpu(super->info2off); - arena->size = (super->nextoff > 0) ? (le64_to_cpu(super->nextoff)) : - (arena->info2off - arena->infooff + BTT_PG_SIZE); + arena->size = (le64_to_cpu(super->nextoff) > 0) + ? (le64_to_cpu(super->nextoff)) + : (arena->info2off - arena->infooff + BTT_PG_SIZE); arena->flags = le32_to_cpu(super->flags); } -- cgit v1.2.3-70-g09d2 From 6b47496a6fc81816e7edaf8224dfb88e402a05f5 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Thu, 23 Jul 2015 11:58:48 -0600 Subject: libnvdimm, pmem: Change pmem physical sector size to PAGE_SIZE Based on a patch: c8fa317 brd: Request from fdisk 4k alignment by Boaz Harrosh, allow fdisk to create properly aligned partitions for DAX. This will also cause mkfs.ext4 to emit a warning if using a file system block size of less than PAGE_SIZE. Cc: Dan Williams Cc: Ross Zwisler Cc: Matthew Wilcox Cc: Christoph Hellwig Cc: Elliott, Robert Signed-off-by: Vishal Verma Acked-by: Boaz Harrosh Acked-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index ade9eb917a4d..bcf48f133443 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -162,6 +162,7 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, return -ENOMEM; blk_queue_make_request(pmem->pmem_queue, pmem_make_request); + blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE); blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX); blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY); queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue); -- cgit v1.2.3-70-g09d2 From f6ef5a2a50816b58e3126206de13d0b9fdf89df5 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Tue, 28 Jul 2015 12:27:01 -0700 Subject: nvdimm: fix inline function return type warning Fix multiple build warnings when CONFIG_BTT is not enabled: In file included from ../drivers/nvdimm/bus.c:29:0: ../drivers/nvdimm/nd.h:169:15: warning: return type defaults to 'int' [-Wreturn-type] static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) ^ Signed-off-by: Randy Dunlap Cc: Dan Williams Cc: linux-nvdimm@lists.01.org Signed-off-by: Dan Williams --- drivers/nvdimm/nd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index c41f53e74277..835263e47bb8 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -166,7 +166,7 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); bool is_nd_btt(struct device *dev); struct device *nd_btt_create(struct nd_region *nd_region); #else -static inline nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) +static inline int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) { return -ENODEV; } -- cgit v1.2.3-70-g09d2 From fbde1414acc0440024083bf0c391b259bcfc4826 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 29 Jul 2015 14:58:07 -0600 Subject: libnvdimm, btt: clean up internal interfaces Consolidate the parameters passed to arena_is_valid into just nd_btt, and an info block to increase re-usability. Similarly, btt_arena_write_layout doesn't need to be passed a uuid, as it can be obtained from arena->nd_btt. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 552f1c4f4dc6..87e6a96a6c19 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -585,12 +585,11 @@ static void free_arenas(struct btt *btt) /* * This function checks if the metadata layout is valid and error free */ -static int arena_is_valid(struct arena_info *arena, struct btt_sb *super, - u8 *uuid, u32 lbasize) +static int arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) { u64 checksum; - if (memcmp(super->uuid, uuid, 16)) + if (memcmp(super->uuid, nd_btt->uuid, 16)) return 0; checksum = le64_to_cpu(super->checksum); @@ -599,12 +598,12 @@ static int arena_is_valid(struct arena_info *arena, struct btt_sb *super, return 0; super->checksum = cpu_to_le64(checksum); - if (lbasize != le32_to_cpu(super->external_lbasize)) + if (nd_btt->lbasize != le32_to_cpu(super->external_lbasize)) return 0; /* TODO: figure out action for this */ if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) - dev_info(to_dev(arena), "Found arena with an error flag\n"); + dev_info(&nd_btt->dev, "Found arena with an error flag\n"); return 1; } @@ -666,8 +665,7 @@ static int discover_arenas(struct btt *btt) if (ret) goto out; - if (!arena_is_valid(arena, super, btt->nd_btt->uuid, - btt->lbasize)) { + if (!arena_is_valid(btt->nd_btt, super)) { if (remaining == btt->rawsize) { btt->init_state = INIT_NOTFOUND; dev_info(to_dev(arena), "No existing arenas\n"); @@ -756,10 +754,11 @@ static int create_arenas(struct btt *btt) * It is only called for an uninitialized arena when a write * to that arena occurs for the first time. */ -static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) +static int btt_arena_write_layout(struct arena_info *arena) { int ret; struct btt_sb *super; + struct nd_btt *nd_btt = arena->nd_btt; ret = btt_map_init(arena); if (ret) @@ -774,7 +773,7 @@ static int btt_arena_write_layout(struct arena_info *arena, u8 *uuid) return -ENOMEM; strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); - memcpy(super->uuid, uuid, 16); + memcpy(super->uuid, nd_btt->uuid, 16); super->flags = cpu_to_le32(arena->flags); super->version_major = cpu_to_le16(arena->version_major); super->version_minor = cpu_to_le16(arena->version_minor); @@ -814,7 +813,7 @@ static int btt_meta_init(struct btt *btt) mutex_lock(&btt->init_lock); list_for_each_entry(arena, &btt->arena_list, list) { - ret = btt_arena_write_layout(arena, btt->nd_btt->uuid); + ret = btt_arena_write_layout(arena); if (ret) goto unlock; -- cgit v1.2.3-70-g09d2 From ab45e7632717b811e0786e46ca5ad279cb731b66 Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 29 Jul 2015 14:58:08 -0600 Subject: libnvdimm, btt: consolidate arena validation Use arena_is_valid as a common routine for checking the validity of an info block from both discover_arenas, and nd_btt_probe. As a result, don't check for validity of the BTT's UUID, and lbasize. The checksum in the BTT info block guarantees self-consistency, and when we're called from nd_btt_probe, we don't have a valid uuid or lbasize available to check against. Also cleanup to return a bool instead of an int. Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 28 +--------------------------- drivers/nvdimm/btt.h | 3 +++ drivers/nvdimm/btt_devs.c | 42 +++++++++++++++++++++++++++++++++--------- 3 files changed, 37 insertions(+), 36 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 87e6a96a6c19..6567746aa315 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -582,32 +582,6 @@ static void free_arenas(struct btt *btt) } } -/* - * This function checks if the metadata layout is valid and error free - */ -static int arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) -{ - u64 checksum; - - if (memcmp(super->uuid, nd_btt->uuid, 16)) - return 0; - - checksum = le64_to_cpu(super->checksum); - super->checksum = 0; - if (checksum != nd_btt_sb_checksum(super)) - return 0; - super->checksum = cpu_to_le64(checksum); - - if (nd_btt->lbasize != le32_to_cpu(super->external_lbasize)) - return 0; - - /* TODO: figure out action for this */ - if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) - dev_info(&nd_btt->dev, "Found arena with an error flag\n"); - - return 1; -} - /* * This function reads an existing valid btt superblock and * populates the corresponding arena_info struct @@ -665,7 +639,7 @@ static int discover_arenas(struct btt *btt) if (ret) goto out; - if (!arena_is_valid(btt->nd_btt, super)) { + if (!nd_btt_arena_is_valid(btt->nd_btt, super)) { if (remaining == btt->rawsize) { btt->init_state = INIT_NOTFOUND; dev_info(to_dev(arena), "No existing arenas\n"); diff --git a/drivers/nvdimm/btt.h b/drivers/nvdimm/btt.h index 75b0d80a6bd9..b2f8651e5395 100644 --- a/drivers/nvdimm/btt.h +++ b/drivers/nvdimm/btt.h @@ -182,4 +182,7 @@ struct btt { int init_state; int num_arenas; }; + +bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super); + #endif diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 6ac8c0fea3ec..18e0663e922c 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -342,6 +342,38 @@ struct device *nd_btt_create(struct nd_region *nd_region) return dev; } +/** + * nd_btt_arena_is_valid - check if the metadata layout is valid + * @nd_btt: device with BTT geometry and backing device info + * @super: pointer to the arena's info block being tested + * + * Check consistency of the btt info block with itself by validating + * the checksum. + * + * Returns: + * false for an invalid info block, true for a valid one + */ +bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) +{ + u64 checksum; + + if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0) + return false; + + checksum = le64_to_cpu(super->checksum); + super->checksum = 0; + if (checksum != nd_btt_sb_checksum(super)) + return false; + super->checksum = cpu_to_le64(checksum); + + /* TODO: figure out action for this */ + if ((le32_to_cpu(super->flags) & IB_FLAG_ERROR_MASK) != 0) + dev_info(&nd_btt->dev, "Found arena with an error flag\n"); + + return true; +} +EXPORT_SYMBOL(nd_btt_arena_is_valid); + /* * nd_btt_sb_checksum: compute checksum for btt info block * @@ -364,8 +396,6 @@ EXPORT_SYMBOL(nd_btt_sb_checksum); static int __nd_btt_probe(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, struct btt_sb *btt_sb) { - u64 checksum; - if (!btt_sb || !ndns || !nd_btt) return -ENODEV; @@ -375,14 +405,8 @@ static int __nd_btt_probe(struct nd_btt *nd_btt, if (nvdimm_namespace_capacity(ndns) < SZ_16M) return -ENXIO; - if (memcmp(btt_sb->signature, BTT_SIG, BTT_SIG_LEN) != 0) - return -ENODEV; - - checksum = le64_to_cpu(btt_sb->checksum); - btt_sb->checksum = 0; - if (checksum != nd_btt_sb_checksum(btt_sb)) + if (!nd_btt_arena_is_valid(nd_btt, btt_sb)) return -ENODEV; - btt_sb->checksum = cpu_to_le64(checksum); nd_btt->lbasize = le32_to_cpu(btt_sb->external_lbasize); nd_btt->uuid = kmemdup(btt_sb->uuid, 16, GFP_KERNEL); -- cgit v1.2.3-70-g09d2 From 6ec689542b5bc516187917d49b112847dfb75b0b Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 29 Jul 2015 14:58:09 -0600 Subject: libnvdimm, btt: write and validate parent_uuid When a BTT is instantiated on a namespace it must validate the namespace uuid matches the 'parent_uuid' stored in the btt superblock. This property enforces that changing the namespace UUID invalidates all former BTT instances on that storage. For "IO namespaces" that don't have a label or UUID, the parent_uuid is set to zero, and this validation is skipped. For such cases, old BTTs have to be invalidated by forcing the namespace to raw mode, and overwriting the BTT info blocks. Based on a patch by Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- drivers/nvdimm/btt.c | 2 ++ drivers/nvdimm/btt_devs.c | 15 ++++++++++++++- drivers/nvdimm/namespace_devs.c | 20 ++++++++++++++++++++ drivers/nvdimm/nd.h | 1 + 4 files changed, 37 insertions(+), 1 deletion(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 6567746aa315..19588291550b 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -733,6 +733,7 @@ static int btt_arena_write_layout(struct arena_info *arena) int ret; struct btt_sb *super; struct nd_btt *nd_btt = arena->nd_btt; + const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); ret = btt_map_init(arena); if (ret) @@ -748,6 +749,7 @@ static int btt_arena_write_layout(struct arena_info *arena) strncpy(super->signature, BTT_SIG, BTT_SIG_LEN); memcpy(super->uuid, nd_btt->uuid, 16); + memcpy(super->parent_uuid, parent_uuid, 16); super->flags = cpu_to_le32(arena->flags); super->version_major = cpu_to_le16(arena->version_major); super->version_minor = cpu_to_le16(arena->version_minor); diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 18e0663e922c..242ae1c550ad 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -342,24 +342,37 @@ struct device *nd_btt_create(struct nd_region *nd_region) return dev; } +static bool uuid_is_null(u8 *uuid) +{ + static const u8 null_uuid[16]; + + return (memcmp(uuid, null_uuid, 16) == 0); +} + /** * nd_btt_arena_is_valid - check if the metadata layout is valid * @nd_btt: device with BTT geometry and backing device info * @super: pointer to the arena's info block being tested * * Check consistency of the btt info block with itself by validating - * the checksum. + * the checksum, and with the parent namespace by verifying the + * parent_uuid contained in the info block with the one supplied in. * * Returns: * false for an invalid info block, true for a valid one */ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) { + const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); u64 checksum; if (memcmp(super->signature, BTT_SIG, BTT_SIG_LEN) != 0) return false; + if (!uuid_is_null(super->parent_uuid)) + if (memcmp(super->parent_uuid, parent_uuid, 16) != 0) + return false; + checksum = le64_to_cpu(super->checksum); super->checksum = 0; if (checksum != nd_btt_sb_checksum(super)) diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index fef0dd80d4ad..b18ffea9d85b 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -100,6 +100,26 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, } EXPORT_SYMBOL(nvdimm_namespace_disk_name); +const u8 *nd_dev_to_uuid(struct device *dev) +{ + static const u8 null_uuid[16]; + + if (!dev) + return null_uuid; + + if (is_namespace_pmem(dev)) { + struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev); + + return nspm->uuid; + } else if (is_namespace_blk(dev)) { + struct nd_namespace_blk *nsblk = to_nd_namespace_blk(dev); + + return nsblk->uuid; + } else + return null_uuid; +} +EXPORT_SYMBOL(nd_dev_to_uuid); + static ssize_t nstype_show(struct device *dev, struct device_attribute *attr, char *buf) { diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 835263e47bb8..f9615824947b 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -217,4 +217,5 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) } void nd_iostat_end(struct bio *bio, unsigned long start); resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); +const u8 *nd_dev_to_uuid(struct device *dev); #endif /* __ND_H__ */ -- cgit v1.2.3-70-g09d2 From 708ab62bef1ed3a3cf065a4138bd87f5d083cfeb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Aug 2015 23:07:08 -0400 Subject: pmem: switch to devm_ allocations Signed-off-by: Christoph Hellwig [djbw: tools/testing/nvdimm/ and memunmap_pmem support] Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/pmem.c | 36 ++++++++++-------------------- include/linux/pmem.h | 14 +++++++----- tools/testing/nvdimm/Kbuild | 4 ++-- tools/testing/nvdimm/test/iomap.c | 46 ++++++++++++++++++++++----------------- 4 files changed, 47 insertions(+), 53 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index bcf48f133443..eb7552d939e1 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -119,7 +119,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, { struct pmem_device *pmem; - pmem = kzalloc(sizeof(*pmem), GFP_KERNEL); + pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL); if (!pmem) return ERR_PTR(-ENOMEM); @@ -128,19 +128,16 @@ static struct pmem_device *pmem_alloc(struct device *dev, if (!arch_has_pmem_api()) dev_warn(dev, "unable to guarantee persistence of writes\n"); - if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) { + if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, + dev_name(dev))) { dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n", &pmem->phys_addr, pmem->size); - kfree(pmem); return ERR_PTR(-EBUSY); } - pmem->virt_addr = memremap_pmem(pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) { - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); + pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, pmem->size); + if (!pmem->virt_addr) return ERR_PTR(-ENXIO); - } return pmem; } @@ -210,20 +207,12 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns, return 0; } -static void pmem_free(struct pmem_device *pmem) -{ - memunmap_pmem(pmem->virt_addr); - release_mem_region(pmem->phys_addr, pmem->size); - kfree(pmem); -} - static int nd_pmem_probe(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_namespace_common *ndns; struct nd_namespace_io *nsio; struct pmem_device *pmem; - int rc; ndns = nvdimm_namespace_common_probe(dev); if (IS_ERR(ndns)) @@ -236,16 +225,14 @@ static int nd_pmem_probe(struct device *dev) dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; + if (is_nd_btt(dev)) - rc = nvdimm_namespace_attach_btt(ndns); - else if (nd_btt_probe(ndns, pmem) == 0) { + return nvdimm_namespace_attach_btt(ndns); + + if (nd_btt_probe(ndns, pmem) == 0) /* we'll come back as btt-pmem */ - rc = -ENXIO; - } else - rc = pmem_attach_disk(ndns, pmem); - if (rc) - pmem_free(pmem); - return rc; + return -ENXIO; + return pmem_attach_disk(ndns, pmem); } static int nd_pmem_remove(struct device *dev) @@ -256,7 +243,6 @@ static int nd_pmem_remove(struct device *dev) nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); else pmem_detach_disk(pmem); - pmem_free(pmem); return 0; } diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 093c35ecefcc..20c367cd76e6 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -46,9 +46,9 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si memcpy(dst, (void __force const *) src, size); } -static inline void memunmap_pmem(void __pmem *addr) +static inline void memunmap_pmem(struct device *dev, void __pmem *addr) { - memunmap((void __force *) addr); + devm_memunmap(dev, (void __force *) addr); } /** @@ -97,13 +97,15 @@ static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, * wmb_pmem() arrange for the data to be written through the * cache to persistent media. */ -static inline void __pmem *memremap_pmem(resource_size_t offset, - unsigned long size) +static inline void __pmem *memremap_pmem(struct device *dev, + resource_size_t offset, unsigned long size) { #ifdef ARCH_MEMREMAP_PMEM - return (void __pmem *) memremap(offset, size, ARCH_MEMREMAP_PMEM); + return (void __pmem *) devm_memremap(dev, offset, size, + ARCH_MEMREMAP_PMEM); #else - return (void __pmem *) memremap(offset, size, MEMREMAP_WT); + return (void __pmem *) devm_memremap(dev, offset, size, + MEMREMAP_WT); #endif } diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 8032a49f7873..04c5fc09576d 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -1,9 +1,9 @@ ldflags-y += --wrap=ioremap_wc ldflags-y += --wrap=devm_ioremap_nocache -ldflags-y += --wrap=memremap -ldflags-y += --wrap=memunmap +ldflags-y += --wrap=devm_memremap ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap +ldflags-y += --wrap=__devm_request_region ldflags-y += --wrap=__request_region ldflags-y += --wrap=__release_region diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 21288f34a5ca..ff1e00458864 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -80,8 +80,8 @@ void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, } EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); -void *__wrap_memremap(resource_size_t offset, size_t size, - unsigned long flags) +void *__wrap_devm_memremap(struct device *dev, resource_size_t offset, + size_t size, unsigned long flags) { struct nfit_test_resource *nfit_res; @@ -91,9 +91,9 @@ void *__wrap_memremap(resource_size_t offset, size_t size, if (nfit_res) return (void __iomem *) nfit_res->buf + offset - nfit_res->res->start; - return memremap(offset, size, flags); + return devm_memremap(dev, offset, size, flags); } -EXPORT_SYMBOL(__wrap_memremap); +EXPORT_SYMBOL(__wrap_devm_memremap); void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) { @@ -120,22 +120,9 @@ void __wrap_iounmap(volatile void __iomem *addr) } EXPORT_SYMBOL(__wrap_iounmap); -void __wrap_memunmap(void *addr) -{ - struct nfit_test_resource *nfit_res; - - rcu_read_lock(); - nfit_res = get_nfit_res((unsigned long) addr); - rcu_read_unlock(); - if (nfit_res) - return; - return memunmap(addr); -} -EXPORT_SYMBOL(__wrap_memunmap); - -struct resource *__wrap___request_region(struct resource *parent, - resource_size_t start, resource_size_t n, const char *name, - int flags) +static struct resource *nfit_test_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name, int flags) { struct nfit_test_resource *nfit_res; @@ -163,10 +150,29 @@ struct resource *__wrap___request_region(struct resource *parent, return res; } } + if (dev) + return __devm_request_region(dev, parent, start, n, name); return __request_region(parent, start, n, name, flags); } + +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags) +{ + return nfit_test_request_region(NULL, parent, start, n, name, flags); +} EXPORT_SYMBOL(__wrap___request_region); +struct resource *__wrap___devm_request_region(struct device *dev, + struct resource *parent, resource_size_t start, + resource_size_t n, const char *name) +{ + if (!dev) + return NULL; + return nfit_test_request_region(dev, parent, start, n, name, 0); +} +EXPORT_SYMBOL(__wrap___devm_request_region); + void __wrap___release_region(struct resource *parent, resource_size_t start, resource_size_t n) { -- cgit v1.2.3-70-g09d2 From 7a67832c7e44c20935c5d6f2264035a0f7bf0d8f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 19 Aug 2015 00:34:34 -0400 Subject: libnvdimm, e820: make CONFIG_X86_PMEM_LEGACY a tristate option We currently register a platform device for e820 type-12 memory and register a nvdimm bus beneath it. Registering the platform device triggers the device-core machinery to probe for a driver, but that search currently comes up empty. Building the nvdimm-bus registration into the e820_pmem platform device registration in this way forces libnvdimm to be built-in. Instead, convert the built-in portion of CONFIG_X86_PMEM_LEGACY to simply register a platform device and move the rest of the logic to the driver for e820_pmem, for the following reasons: 1/ Letting e820_pmem support be a module allows building and testing libnvdimm.ko changes without rebooting 2/ All the normal policy around modules can be applied to e820_pmem (unbind to disable and/or blacklisting the module from loading by default) 3/ Moving the driver to a generic location and converting it to scan "iomem_resource" rather than "e820.map" means any other architecture can take advantage of this simple nvdimm resource discovery mechanism by registering a resource named "Persistent Memory (legacy)" Cc: Christoph Hellwig Signed-off-by: Dan Williams --- arch/x86/Kconfig | 6 ++- arch/x86/include/uapi/asm/e820.h | 2 +- arch/x86/kernel/Makefile | 2 +- arch/x86/kernel/pmem.c | 79 ++++-------------------------------- drivers/nvdimm/Makefile | 3 ++ drivers/nvdimm/e820.c | 86 ++++++++++++++++++++++++++++++++++++++++ tools/testing/nvdimm/Kbuild | 4 ++ 7 files changed, 108 insertions(+), 74 deletions(-) create mode 100644 drivers/nvdimm/e820.c (limited to 'drivers/nvdimm') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b3a1a5d77d92..76c61154ed50 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1426,10 +1426,14 @@ config ILLEGAL_POINTER_VALUE source "mm/Kconfig" +config X86_PMEM_LEGACY_DEVICE + bool + config X86_PMEM_LEGACY - bool "Support non-standard NVDIMMs and ADR protected memory" + tristate "Support non-standard NVDIMMs and ADR protected memory" depends on PHYS_ADDR_T_64BIT depends on BLK_DEV + select X86_PMEM_LEGACY_DEVICE select LIBNVDIMM help Treat memory marked using the non-standard e820 type of 12 as used diff --git a/arch/x86/include/uapi/asm/e820.h b/arch/x86/include/uapi/asm/e820.h index 0f457e6eab18..9dafe59cf6e2 100644 --- a/arch/x86/include/uapi/asm/e820.h +++ b/arch/x86/include/uapi/asm/e820.h @@ -37,7 +37,7 @@ /* * This is a non-standardized way to represent ADR or NVDIMM regions that * persist over a reboot. The kernel will ignore their special capabilities - * unless the CONFIG_X86_PMEM_LEGACY=y option is set. + * unless the CONFIG_X86_PMEM_LEGACY option is set. * * ( Note that older platforms also used 6 for the same type of memory, * but newer versions switched to 12 as 6 was assigned differently. Some diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 0f15af41bd80..ac2bb7e28ba2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -92,7 +92,7 @@ obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o -obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o +obj-$(CONFIG_X86_PMEM_LEGACY_DEVICE) += pmem.o obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 64f90f53bb85..4f00b63d7ff3 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -3,80 +3,17 @@ * Copyright (c) 2015, Intel Corporation. */ #include -#include #include -#include - -static void e820_pmem_release(struct device *dev) -{ - struct nvdimm_bus *nvdimm_bus = dev->platform_data; - - if (nvdimm_bus) - nvdimm_bus_unregister(nvdimm_bus); -} - -static struct platform_device e820_pmem = { - .name = "e820_pmem", - .id = -1, - .dev = { - .release = e820_pmem_release, - }, -}; - -static const struct attribute_group *e820_pmem_attribute_groups[] = { - &nvdimm_bus_attribute_group, - NULL, -}; - -static const struct attribute_group *e820_pmem_region_attribute_groups[] = { - &nd_region_attribute_group, - &nd_device_attribute_group, - NULL, -}; static __init int register_e820_pmem(void) { - static struct nvdimm_bus_descriptor nd_desc; - struct device *dev = &e820_pmem.dev; - struct nvdimm_bus *nvdimm_bus; - int rc, i; - - rc = platform_device_register(&e820_pmem); - if (rc) - return rc; - - nd_desc.attr_groups = e820_pmem_attribute_groups; - nd_desc.provider_name = "e820"; - nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); - if (!nvdimm_bus) - goto err; - dev->platform_data = nvdimm_bus; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - struct resource res = { - .flags = IORESOURCE_MEM, - .start = ei->addr, - .end = ei->addr + ei->size - 1, - }; - struct nd_region_desc ndr_desc; - - if (ei->type != E820_PRAM) - continue; - - memset(&ndr_desc, 0, sizeof(ndr_desc)); - ndr_desc.res = &res; - ndr_desc.attr_groups = e820_pmem_region_attribute_groups; - ndr_desc.numa_node = NUMA_NO_NODE; - if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) - goto err; - } - - return 0; - - err: - dev_err(dev, "failed to register legacy persistent memory ranges\n"); - platform_device_unregister(&e820_pmem); - return -ENXIO; + struct platform_device *pdev; + + /* + * See drivers/nvdimm/e820.c for the implementation, this is + * simply here to trigger the module to load on demand. + */ + pdev = platform_device_alloc("e820_pmem", -1); + return platform_device_add(pdev); } device_initcall(register_e820_pmem); diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 594bb97c867a..9bf15db52dee 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -2,6 +2,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o nd_pmem-y := pmem.o @@ -9,6 +10,8 @@ nd_btt-y := btt.o nd_blk-y := blk.o +nd_e820-y := e820.o + libnvdimm-y := core.o libnvdimm-y += bus.o libnvdimm-y += dimm_devs.o diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c new file mode 100644 index 000000000000..1b5743ad92db --- /dev/null +++ b/drivers/nvdimm/e820.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015, Christoph Hellwig. + * Copyright (c) 2015, Intel Corporation. + */ +#include +#include +#include + +static const struct attribute_group *e820_pmem_attribute_groups[] = { + &nvdimm_bus_attribute_group, + NULL, +}; + +static const struct attribute_group *e820_pmem_region_attribute_groups[] = { + &nd_region_attribute_group, + &nd_device_attribute_group, + NULL, +}; + +static int e820_pmem_remove(struct platform_device *pdev) +{ + struct nvdimm_bus *nvdimm_bus = platform_get_drvdata(pdev); + + nvdimm_bus_unregister(nvdimm_bus); + return 0; +} + +static int e820_pmem_probe(struct platform_device *pdev) +{ + static struct nvdimm_bus_descriptor nd_desc; + struct device *dev = &pdev->dev; + struct nvdimm_bus *nvdimm_bus; + struct resource *p; + + nd_desc.attr_groups = e820_pmem_attribute_groups; + nd_desc.provider_name = "e820"; + nvdimm_bus = nvdimm_bus_register(dev, &nd_desc); + if (!nvdimm_bus) + goto err; + platform_set_drvdata(pdev, nvdimm_bus); + + for (p = iomem_resource.child; p ; p = p->sibling) { + struct nd_region_desc ndr_desc; + + if (strncmp(p->name, "Persistent Memory (legacy)", 26) != 0) + continue; + + memset(&ndr_desc, 0, sizeof(ndr_desc)); + ndr_desc.res = p; + ndr_desc.attr_groups = e820_pmem_region_attribute_groups; + ndr_desc.numa_node = NUMA_NO_NODE; + if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) + goto err; + } + + return 0; + + err: + nvdimm_bus_unregister(nvdimm_bus); + dev_err(dev, "failed to register legacy persistent memory ranges\n"); + return -ENXIO; +} + +static struct platform_driver e820_pmem_driver = { + .probe = e820_pmem_probe, + .remove = e820_pmem_remove, + .driver = { + .name = "e820_pmem", + }, +}; + +static __init int e820_pmem_init(void) +{ + return platform_driver_register(&e820_pmem_driver); +} + +static __exit void e820_pmem_exit(void) +{ + platform_driver_unregister(&e820_pmem_driver); +} + +MODULE_ALIAS("platform:e820_pmem*"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); +module_init(e820_pmem_init); +module_exit(e820_pmem_exit); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index f56914c7929b..d7c136a96346 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -15,6 +15,7 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o obj-$(CONFIG_ND_BTT) += nd_btt.o obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o obj-$(CONFIG_ACPI_NFIT) += nfit.o nfit-y := $(ACPI_SRC)/nfit.o @@ -29,6 +30,9 @@ nd_btt-y += config_check.o nd_blk-y := $(NVDIMM_SRC)/blk.o nd_blk-y += config_check.o +nd_e820-y := $(NVDIMM_SRC)/e820.o +nd_e820-y += config_check.o + libnvdimm-y := $(NVDIMM_SRC)/core.o libnvdimm-y += $(NVDIMM_SRC)/bus.o libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o -- cgit v1.2.3-70-g09d2 From e2e05394e4a3420dab96f728df4531893494e15d Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 18 Aug 2015 13:55:41 -0600 Subject: pmem, dax: have direct_access use __pmem annotation Update the annotation for the kaddr pointer returned by direct_access() so that it is a __pmem pointer. This is consistent with the PMEM driver and with how this direct_access() pointer is used in the DAX code. Signed-off-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- Documentation/filesystems/Locking | 3 ++- arch/powerpc/sysdev/axonram.c | 7 ++++--- drivers/block/brd.c | 4 ++-- drivers/nvdimm/pmem.c | 4 ++-- drivers/s390/block/dcssblk.c | 10 ++++++---- fs/block_dev.c | 2 +- fs/dax.c | 37 ++++++++++++++++++++----------------- include/linux/blkdev.h | 8 ++++---- 8 files changed, 41 insertions(+), 34 deletions(-) (limited to 'drivers/nvdimm') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 6a34a0f4d37c..06d443450f21 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -397,7 +397,8 @@ prototypes: int (*release) (struct gendisk *, fmode_t); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *); + int (*direct_access) (struct block_device *, sector_t, void __pmem **, + unsigned long *); int (*media_changed) (struct gendisk *); void (*unlock_native_capacity) (struct gendisk *); int (*revalidate_disk) (struct gendisk *); diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index ee90db17b097..a2be2a66dab6 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -141,13 +141,14 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; + void *addr = (void *)(bank->ph_addr + offset); - *kaddr = (void *)(bank->ph_addr + offset); - *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; + *kaddr = (void __pmem *)addr; + *pfn = virt_to_phys(addr) >> PAGE_SHIFT; return bank->size - offset; } diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 64ab4951e9d6..c96402fd1560 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -371,7 +371,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, #ifdef CONFIG_BLK_DEV_RAM_DAX static long brd_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; @@ -381,7 +381,7 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, page = brd_insert_page(brd, sector); if (!page) return -ENOSPC; - *kaddr = page_address(page); + *kaddr = (void __pmem *)page_address(page); *pfn = page_to_pfn(page); /* diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index eb7552d939e1..f3b629779266 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -92,7 +92,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct pmem_device *pmem = bdev->bd_disk->private_data; size_t offset = sector << 9; @@ -101,7 +101,7 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, return -ENODEV; /* FIXME convert DAX to comprehend that this mapping has a lifetime */ - *kaddr = (void __force *) pmem->virt_addr + offset; + *kaddr = pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; return pmem->size - offset; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index da212813f2d5..2c5a397b9f3e 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_make_request(struct request_queue *q, struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn, long size); + void __pmem **kaddr, unsigned long *pfn, long size); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -879,18 +879,20 @@ fail: static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn, long size) { struct dcssblk_dev_info *dev_info; unsigned long offset, dev_sz; + void *addr; dev_info = bdev->bd_disk->private_data; if (!dev_info) return -ENODEV; dev_sz = dev_info->end - dev_info->start; offset = secnum * 512; - *kaddr = (void *) (dev_info->start + offset); - *pfn = virt_to_phys(*kaddr) >> PAGE_SHIFT; + addr = (void *) (dev_info->start + offset); + *pfn = virt_to_phys(addr) >> PAGE_SHIFT; + *kaddr = (void __pmem *) addr; return dev_sz - offset; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 198243717da5..2345a9870e2c 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -441,7 +441,7 @@ EXPORT_SYMBOL_GPL(bdev_write_page); * accessible at this address. */ long bdev_direct_access(struct block_device *bdev, sector_t sector, - void **addr, unsigned long *pfn, long size) + void __pmem **addr, unsigned long *pfn, long size) { long avail; const struct block_device_operations *ops = bdev->bd_disk->fops; diff --git a/fs/dax.c b/fs/dax.c index e07fecc93f80..7c634ac797b1 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -35,7 +35,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) might_sleep(); do { - void *addr; + void __pmem *addr; unsigned long pfn; long count; @@ -47,7 +47,7 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) unsigned pgsz = PAGE_SIZE - offset_in_page(addr); if (pgsz > count) pgsz = count; - clear_pmem((void __pmem *)addr, pgsz); + clear_pmem(addr, pgsz); addr += pgsz; size -= pgsz; count -= pgsz; @@ -62,7 +62,8 @@ int dax_clear_blocks(struct inode *inode, sector_t block, long size) } EXPORT_SYMBOL_GPL(dax_clear_blocks); -static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits) +static long dax_get_addr(struct buffer_head *bh, void __pmem **addr, + unsigned blkbits) { unsigned long pfn; sector_t sector = bh->b_blocknr << (blkbits - 9); @@ -70,15 +71,15 @@ static long dax_get_addr(struct buffer_head *bh, void **addr, unsigned blkbits) } /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */ -static void dax_new_buf(void *addr, unsigned size, unsigned first, loff_t pos, - loff_t end) +static void dax_new_buf(void __pmem *addr, unsigned size, unsigned first, + loff_t pos, loff_t end) { loff_t final = end - pos + first; /* The final byte of the buffer */ if (first > 0) - clear_pmem((void __pmem *)addr, first); + clear_pmem(addr, first); if (final < size) - clear_pmem((void __pmem *)addr + final, size - final); + clear_pmem(addr + final, size - final); } static bool buffer_written(struct buffer_head *bh) @@ -106,7 +107,7 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, loff_t pos = start; loff_t max = start; loff_t bh_max = start; - void *addr; + void __pmem *addr; bool hole = false; bool need_wmb = false; @@ -158,11 +159,11 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter *iter, } if (iov_iter_rw(iter) == WRITE) { - len = copy_from_iter_pmem((void __pmem *)addr, - max - pos, iter); + len = copy_from_iter_pmem(addr, max - pos, iter); need_wmb = true; } else if (!hole) - len = copy_to_iter(addr, max - pos, iter); + len = copy_to_iter((void __force *)addr, max - pos, + iter); else len = iov_iter_zero(max - pos, iter); @@ -268,11 +269,13 @@ static int dax_load_hole(struct address_space *mapping, struct page *page, static int copy_user_bh(struct page *to, struct buffer_head *bh, unsigned blkbits, unsigned long vaddr) { - void *vfrom, *vto; + void __pmem *vfrom; + void *vto; + if (dax_get_addr(bh, &vfrom, blkbits) < 0) return -EIO; vto = kmap_atomic(to); - copy_user_page(vto, vfrom, vaddr, to); + copy_user_page(vto, (void __force *)vfrom, vaddr, to); kunmap_atomic(vto); return 0; } @@ -283,7 +286,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, struct address_space *mapping = inode->i_mapping; sector_t sector = bh->b_blocknr << (inode->i_blkbits - 9); unsigned long vaddr = (unsigned long)vmf->virtual_address; - void *addr; + void __pmem *addr; unsigned long pfn; pgoff_t size; int error; @@ -312,7 +315,7 @@ static int dax_insert_mapping(struct inode *inode, struct buffer_head *bh, } if (buffer_unwritten(bh) || buffer_new(bh)) { - clear_pmem((void __pmem *)addr, PAGE_SIZE); + clear_pmem(addr, PAGE_SIZE); wmb_pmem(); } @@ -548,11 +551,11 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length, if (err < 0) return err; if (buffer_written(&bh)) { - void *addr; + void __pmem *addr; err = dax_get_addr(&bh, &addr, inode->i_blkbits); if (err < 0) return err; - clear_pmem((void __pmem *)addr + offset, length); + clear_pmem(addr + offset, length); wmb_pmem(); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4068c17d0df..c401ecdff9cb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1555,8 +1555,8 @@ struct block_device_operations { int (*rw_page)(struct block_device *, sector_t, struct page *, int rw); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - long (*direct_access)(struct block_device *, sector_t, - void **, unsigned long *pfn, long size); + long (*direct_access)(struct block_device *, sector_t, void __pmem **, + unsigned long *pfn, long size); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ @@ -1574,8 +1574,8 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -extern long bdev_direct_access(struct block_device *, sector_t, void **addr, - unsigned long *pfn, long size); +extern long bdev_direct_access(struct block_device *, sector_t, + void __pmem **addr, unsigned long *pfn, long size); #else /* CONFIG_BLOCK */ struct block_device; -- cgit v1.2.3-70-g09d2 From a06a7576526e10a99ea7721533e7f2df3e26baad Mon Sep 17 00:00:00 2001 From: yalin wang Date: Thu, 27 Aug 2015 19:35:48 -0400 Subject: nvdimm: change to use generic kvfree() Signed-off-by: yalin wang Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/dimm_devs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c index c05eb807d674..651b8d19d324 100644 --- a/drivers/nvdimm/dimm_devs.c +++ b/drivers/nvdimm/dimm_devs.c @@ -241,10 +241,7 @@ void nvdimm_drvdata_release(struct kref *kref) nvdimm_free_dpa(ndd, res); nvdimm_bus_unlock(dev); - if (ndd->data && is_vmalloc_addr(ndd->data)) - vfree(ndd->data); - else - kfree(ndd->data); + kvfree(ndd->data); kfree(ndd); put_device(dev); } -- cgit v1.2.3-70-g09d2 From cb389b9c0e00c30c9daf20287f7d91e2466edbb1 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 7 Aug 2015 17:41:00 -0400 Subject: dax: drop size parameter to ->direct_access() None of the implementations currently use it. The common bdev_direct_access() entry point handles all the size checks before calling ->direct_access(). Signed-off-by: Christoph Hellwig Signed-off-by: Dan Williams --- arch/powerpc/sysdev/axonram.c | 2 +- drivers/block/brd.c | 6 +----- drivers/nvdimm/pmem.c | 2 +- drivers/s390/block/dcssblk.c | 4 ++-- fs/block_dev.c | 2 +- include/linux/blkdev.h | 2 +- 6 files changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers/nvdimm') diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c index a2be2a66dab6..4419c84ac15a 100644 --- a/arch/powerpc/sysdev/axonram.c +++ b/arch/powerpc/sysdev/axonram.c @@ -141,7 +141,7 @@ axon_ram_make_request(struct request_queue *queue, struct bio *bio) */ static long axon_ram_direct_access(struct block_device *device, sector_t sector, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct axon_ram_bank *bank = device->bd_disk->private_data; loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT; diff --git a/drivers/block/brd.c b/drivers/block/brd.c index c96402fd1560..03c45c41bdfa 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -371,7 +371,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, #ifdef CONFIG_BLK_DEV_RAM_DAX static long brd_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct brd_device *brd = bdev->bd_disk->private_data; struct page *page; @@ -384,10 +384,6 @@ static long brd_direct_access(struct block_device *bdev, sector_t sector, *kaddr = (void __pmem *)page_address(page); *pfn = page_to_pfn(page); - /* - * TODO: If size > PAGE_SIZE, we could look to see if the next page in - * the file happens to be mapped to the next page of physical RAM. - */ return PAGE_SIZE; } #else diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index f3b629779266..3b5b9cb758b6 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -92,7 +92,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, } static long pmem_direct_access(struct block_device *bdev, sector_t sector, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; size_t offset = sector << 9; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 2c5a397b9f3e..8c027a9e4e8a 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -29,7 +29,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static void dcssblk_make_request(struct request_queue *q, struct bio *bio); static long dcssblk_direct_access(struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, unsigned long *pfn, long size); + void __pmem **kaddr, unsigned long *pfn); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -879,7 +879,7 @@ fail: static long dcssblk_direct_access (struct block_device *bdev, sector_t secnum, - void __pmem **kaddr, unsigned long *pfn, long size) + void __pmem **kaddr, unsigned long *pfn) { struct dcssblk_dev_info *dev_info; unsigned long offset, dev_sz; diff --git a/fs/block_dev.c b/fs/block_dev.c index 2345a9870e2c..3831e5691b32 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -462,7 +462,7 @@ long bdev_direct_access(struct block_device *bdev, sector_t sector, sector += get_start_sect(bdev); if (sector % (PAGE_SIZE / 512)) return -EINVAL; - avail = ops->direct_access(bdev, sector, addr, pfn, size); + avail = ops->direct_access(bdev, sector, addr, pfn); if (!avail) return -ERANGE; return min(avail, size); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c401ecdff9cb..c22064f326b2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1556,7 +1556,7 @@ struct block_device_operations { int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); long (*direct_access)(struct block_device *, sector_t, void __pmem **, - unsigned long *pfn, long size); + unsigned long *pfn); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ -- cgit v1.2.3-70-g09d2 From 96601adb745186ccbcf5b078d4756f13381ec2af Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 24 Aug 2015 18:29:38 -0400 Subject: x86, pmem: clarify that ARCH_HAS_PMEM_API implies PMEM mapped WB Given that a write-back (WB) mapping plus non-temporal stores is expected to be the most efficient way to access PMEM, update the definition of ARCH_HAS_PMEM_API to imply arch support for WB-mapped-PMEM. This is needed as a pre-requisite for adding PMEM to the direct map and mapping it with struct page. The above clarification for X86_64 means that memcpy_to_pmem() is permitted to use the non-temporal arch_memcpy_to_pmem() rather than needlessly fall back to default_memcpy_to_pmem() when the pcommit instruction is not available. When arch_memcpy_to_pmem() is not guaranteed to flush writes out of cache, i.e. on older X86_32 implementations where non-temporal stores may just dirty cache, ARCH_HAS_PMEM_API is simply disabled. The default fall back for persistent memory handling remains. Namely, map it with the WT (write-through) cache-type and hope for the best. arch_has_pmem_api() is updated to only indicate whether the arch provides the proper helpers to meet the minimum "writes are visible outside the cache hierarchy after memcpy_to_pmem() + wmb_pmem()". Code that cares whether wmb_pmem() actually flushes writes to pmem must now call arch_has_wmb_pmem() directly. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Reviewed-by: Ross Zwisler [hch: set ARCH_HAS_PMEM_API=n on x86_32] Reviewed-by: Christoph Hellwig [toshi: x86_32 compile fixes] Signed-off-by: Toshi Kani Signed-off-by: Dan Williams --- arch/x86/Kconfig | 2 +- arch/x86/include/asm/pmem.h | 9 +-------- drivers/acpi/nfit.c | 3 ++- drivers/nvdimm/pmem.c | 2 +- include/linux/pmem.h | 36 ++++++++++++++++++++++-------------- 5 files changed, 27 insertions(+), 25 deletions(-) (limited to 'drivers/nvdimm') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 03ab6122325a..ef4c6bbb3af1 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -27,7 +27,7 @@ config X86 select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_GCOV_PROFILE_ALL - select ARCH_HAS_PMEM_API + select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_SG_CHAIN select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h index bb026c5adf8a..d8ce3ec816ab 100644 --- a/arch/x86/include/asm/pmem.h +++ b/arch/x86/include/asm/pmem.h @@ -18,8 +18,6 @@ #include #include -#define ARCH_MEMREMAP_PMEM MEMREMAP_WB - #ifdef CONFIG_ARCH_HAS_PMEM_API /** * arch_memcpy_to_pmem - copy data to persistent memory @@ -143,18 +141,13 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) __arch_wb_cache_pmem(vaddr, size); } -static inline bool arch_has_wmb_pmem(void) +static inline bool __arch_has_wmb_pmem(void) { -#ifdef CONFIG_X86_64 /* * We require that wmb() be an 'sfence', that is only guaranteed on * 64-bit builds */ return static_cpu_has(X86_FEATURE_PCOMMIT); -#else - return false; -#endif } #endif /* CONFIG_ARCH_HAS_PMEM_API */ - #endif /* __ASM_X86_PMEM_H__ */ diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c index 56fff0141636..f61e69fa2ad1 100644 --- a/drivers/acpi/nfit.c +++ b/drivers/acpi/nfit.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "nfit.h" /* @@ -1371,7 +1372,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus, return -ENOMEM; } - if (!arch_has_pmem_api() && !nfit_blk->nvdimm_flush) + if (!arch_has_wmb_pmem() && !nfit_blk->nvdimm_flush) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (mmio->line_size == 0) diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 3b5b9cb758b6..20bf122328da 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -125,7 +125,7 @@ static struct pmem_device *pmem_alloc(struct device *dev, pmem->phys_addr = res->start; pmem->size = resource_size(res); - if (!arch_has_pmem_api()) + if (!arch_has_wmb_pmem()) dev_warn(dev, "unable to guarantee persistence of writes\n"); if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size, diff --git a/include/linux/pmem.h b/include/linux/pmem.h index a9d84bf335ee..85f810b33917 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -17,16 +17,23 @@ #include #ifdef CONFIG_ARCH_HAS_PMEM_API +#define ARCH_MEMREMAP_PMEM MEMREMAP_WB #include #else -static inline void arch_wmb_pmem(void) +#define ARCH_MEMREMAP_PMEM MEMREMAP_WT +/* + * These are simply here to enable compilation, all call sites gate + * calling these symbols with arch_has_pmem_api() and redirect to the + * implementation in asm/pmem.h. + */ +static inline bool __arch_has_wmb_pmem(void) { - BUG(); + return false; } -static inline bool arch_has_wmb_pmem(void) +static inline void arch_wmb_pmem(void) { - return false; + BUG(); } static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, @@ -53,7 +60,6 @@ static inline void arch_clear_pmem(void __pmem *addr, size_t size) * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), * arch_copy_from_iter_pmem(), arch_clear_pmem() and arch_has_wmb_pmem(). */ - static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) { memcpy(dst, (void __force const *) src, size); @@ -64,8 +70,13 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr) devm_memunmap(dev, (void __force *) addr); } +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); +} + /** - * arch_has_pmem_api - true if wmb_pmem() ensures durability + * arch_has_wmb_pmem - true if wmb_pmem() ensures durability * * For a given cpu implementation within an architecture it is possible * that wmb_pmem() resolves to a nop. In the case this returns @@ -73,9 +84,9 @@ static inline void memunmap_pmem(struct device *dev, void __pmem *addr) * fall back to a different data consistency model, or otherwise notify * the user. */ -static inline bool arch_has_pmem_api(void) +static inline bool arch_has_wmb_pmem(void) { - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API) && arch_has_wmb_pmem(); + return arch_has_pmem_api() && __arch_has_wmb_pmem(); } /* @@ -120,13 +131,8 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size) static inline void __pmem *memremap_pmem(struct device *dev, resource_size_t offset, unsigned long size) { -#ifdef ARCH_MEMREMAP_PMEM return (void __pmem *) devm_memremap(dev, offset, size, ARCH_MEMREMAP_PMEM); -#else - return (void __pmem *) devm_memremap(dev, offset, size, - MEMREMAP_WT); -#endif } /** @@ -158,8 +164,10 @@ static inline void memcpy_to_pmem(void __pmem *dst, const void *src, size_t n) */ static inline void wmb_pmem(void) { - if (arch_has_pmem_api()) + if (arch_has_wmb_pmem()) arch_wmb_pmem(); + else + wmb(); } /** -- cgit v1.2.3-70-g09d2 From e1455744b27c9e6115c3508a7b2902157c2c4347 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 30 Jul 2015 17:57:47 -0400 Subject: libnvdimm, pfn: 'struct page' provider infrastructure Implement the base infrastructure for libnvdimm PFN devices. Similar to BTT devices they take a namespace as a backing device and layer functionality on top. In this case the functionality is reserving space for an array of 'struct page' entries to be handed out through pfn_to_page(). For now this is just the basic libnvdimm-device-model for configuring the base PFN device. As the namespace claiming mechanism for PFN devices is mostly identical to BTT devices drivers/nvdimm/claim.c is created to house the common bits. Cc: Ross Zwisler Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 22 +++ drivers/nvdimm/Makefile | 2 + drivers/nvdimm/btt.c | 6 +- drivers/nvdimm/btt_devs.c | 172 +------------------- drivers/nvdimm/claim.c | 201 ++++++++++++++++++++++++ drivers/nvdimm/namespace_devs.c | 34 +++- drivers/nvdimm/nd-core.h | 9 ++ drivers/nvdimm/nd.h | 56 ++++++- drivers/nvdimm/pfn.h | 35 +++++ drivers/nvdimm/pfn_devs.c | 336 ++++++++++++++++++++++++++++++++++++++++ drivers/nvdimm/region.c | 2 + drivers/nvdimm/region_devs.c | 19 +++ tools/testing/nvdimm/Kbuild | 2 + 13 files changed, 719 insertions(+), 177 deletions(-) create mode 100644 drivers/nvdimm/claim.c create mode 100644 drivers/nvdimm/pfn.h create mode 100644 drivers/nvdimm/pfn_devs.c (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index 72226acb5c0f..ace25b53b755 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -21,6 +21,7 @@ config BLK_DEV_PMEM default LIBNVDIMM depends on HAS_IOMEM select ND_BTT if BTT + select ND_PFN if NVDIMM_PFN help Memory ranges for PMEM are described by either an NFIT (NVDIMM Firmware Interface Table, see CONFIG_NFIT_ACPI), a @@ -47,12 +48,16 @@ config ND_BLK (CONFIG_ACPI_NFIT), or otherwise exposes BLK-mode capabilities. +config ND_CLAIM + bool + config ND_BTT tristate config BTT bool "BTT: Block Translation Table (atomic sector updates)" default y if LIBNVDIMM + select ND_CLAIM help The Block Translation Table (BTT) provides atomic sector update semantics for persistent memory devices, so that @@ -65,4 +70,21 @@ config BTT Select Y if unsure +config ND_PFN + tristate + +config NVDIMM_PFN + bool "PFN: Map persistent (device) memory" + default LIBNVDIMM + select ND_CLAIM + help + Map persistent memory, i.e. advertise it to the memory + management sub-system. By default persistent memory does + not support direct I/O, RDMA, or any other usage that + requires a 'struct page' to mediate an I/O request. This + driver allocates and initializes the infrastructure needed + to support those use cases. + + Select Y if unsure + endif diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile index 9bf15db52dee..ea84d3c4e8e5 100644 --- a/drivers/nvdimm/Makefile +++ b/drivers/nvdimm/Makefile @@ -20,4 +20,6 @@ libnvdimm-y += region_devs.o libnvdimm-y += region.o libnvdimm-y += namespace_devs.o libnvdimm-y += label.o +libnvdimm-$(CONFIG_ND_CLAIM) += claim.o libnvdimm-$(CONFIG_BTT) += btt_devs.o +libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 19588291550b..028d2d137bc5 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -731,6 +731,7 @@ static int create_arenas(struct btt *btt) static int btt_arena_write_layout(struct arena_info *arena) { int ret; + u64 sum; struct btt_sb *super; struct nd_btt *nd_btt = arena->nd_btt; const u8 *parent_uuid = nd_dev_to_uuid(&nd_btt->ndns->dev); @@ -770,7 +771,8 @@ static int btt_arena_write_layout(struct arena_info *arena) super->info2off = cpu_to_le64(arena->info2off - arena->infooff); super->flags = 0; - super->checksum = cpu_to_le64(nd_btt_sb_checksum(super)); + sum = nd_sb_checksum((struct nd_gen_sb *) super); + super->checksum = cpu_to_le64(sum); ret = btt_info_write(arena, super); @@ -1422,8 +1424,6 @@ static int __init nd_btt_init(void) { int rc; - BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); - btt_major = register_blkdev(0, "btt"); if (btt_major < 0) return btt_major; diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c index 242ae1c550ad..59ad54a63d9f 100644 --- a/drivers/nvdimm/btt_devs.c +++ b/drivers/nvdimm/btt_devs.c @@ -21,63 +21,13 @@ #include "btt.h" #include "nd.h" -static void __nd_btt_detach_ndns(struct nd_btt *nd_btt) -{ - struct nd_namespace_common *ndns = nd_btt->ndns; - - dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) - || ndns->claim != &nd_btt->dev, - "%s: invalid claim\n", __func__); - ndns->claim = NULL; - nd_btt->ndns = NULL; - put_device(&ndns->dev); -} - -static void nd_btt_detach_ndns(struct nd_btt *nd_btt) -{ - struct nd_namespace_common *ndns = nd_btt->ndns; - - if (!ndns) - return; - get_device(&ndns->dev); - device_lock(&ndns->dev); - __nd_btt_detach_ndns(nd_btt); - device_unlock(&ndns->dev); - put_device(&ndns->dev); -} - -static bool __nd_btt_attach_ndns(struct nd_btt *nd_btt, - struct nd_namespace_common *ndns) -{ - if (ndns->claim) - return false; - dev_WARN_ONCE(&nd_btt->dev, !mutex_is_locked(&ndns->dev.mutex) - || nd_btt->ndns, - "%s: invalid claim\n", __func__); - ndns->claim = &nd_btt->dev; - nd_btt->ndns = ndns; - get_device(&ndns->dev); - return true; -} - -static bool nd_btt_attach_ndns(struct nd_btt *nd_btt, - struct nd_namespace_common *ndns) -{ - bool claimed; - - device_lock(&ndns->dev); - claimed = __nd_btt_attach_ndns(nd_btt, ndns); - device_unlock(&ndns->dev); - return claimed; -} - static void nd_btt_release(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); struct nd_btt *nd_btt = to_nd_btt(dev); dev_dbg(dev, "%s\n", __func__); - nd_btt_detach_ndns(nd_btt); + nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns); ida_simple_remove(&nd_region->btt_ida, nd_btt->id); kfree(nd_btt->uuid); kfree(nd_btt); @@ -172,104 +122,15 @@ static ssize_t namespace_show(struct device *dev, return rc; } -static int namespace_match(struct device *dev, void *data) -{ - char *name = data; - - return strcmp(name, dev_name(dev)) == 0; -} - -static bool is_nd_btt_idle(struct device *dev) -{ - struct nd_region *nd_region = to_nd_region(dev->parent); - struct nd_btt *nd_btt = to_nd_btt(dev); - - if (nd_region->btt_seed == dev || nd_btt->ndns || dev->driver) - return false; - return true; -} - -static ssize_t __namespace_store(struct device *dev, - struct device_attribute *attr, const char *buf, size_t len) -{ - struct nd_btt *nd_btt = to_nd_btt(dev); - struct nd_namespace_common *ndns; - struct device *found; - char *name; - - if (dev->driver) { - dev_dbg(dev, "%s: -EBUSY\n", __func__); - return -EBUSY; - } - - name = kstrndup(buf, len, GFP_KERNEL); - if (!name) - return -ENOMEM; - strim(name); - - if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) - /* pass */; - else { - len = -EINVAL; - goto out; - } - - ndns = nd_btt->ndns; - if (strcmp(name, "") == 0) { - /* detach the namespace and destroy / reset the btt device */ - nd_btt_detach_ndns(nd_btt); - if (is_nd_btt_idle(dev)) - nd_device_unregister(dev, ND_ASYNC); - else { - nd_btt->lbasize = 0; - kfree(nd_btt->uuid); - nd_btt->uuid = NULL; - } - goto out; - } else if (ndns) { - dev_dbg(dev, "namespace already set to: %s\n", - dev_name(&ndns->dev)); - len = -EBUSY; - goto out; - } - - found = device_find_child(dev->parent, name, namespace_match); - if (!found) { - dev_dbg(dev, "'%s' not found under %s\n", name, - dev_name(dev->parent)); - len = -ENODEV; - goto out; - } - - ndns = to_ndns(found); - if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { - dev_dbg(dev, "%s too small to host btt\n", name); - len = -ENXIO; - goto out_attach; - } - - WARN_ON_ONCE(!is_nvdimm_bus_locked(&nd_btt->dev)); - if (!nd_btt_attach_ndns(nd_btt, ndns)) { - dev_dbg(dev, "%s already claimed\n", - dev_name(&ndns->dev)); - len = -EBUSY; - } - - out_attach: - put_device(&ndns->dev); /* from device_find_child */ - out: - kfree(name); - return len; -} - static ssize_t namespace_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { + struct nd_btt *nd_btt = to_nd_btt(dev); ssize_t rc; nvdimm_bus_lock(dev); device_lock(dev); - rc = __namespace_store(dev, attr, buf, len); + rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len); dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, rc, buf, buf[len - 1] == '\n' ? "" : "\n"); device_unlock(dev); @@ -324,7 +185,7 @@ static struct device *__nd_btt_create(struct nd_region *nd_region, dev->type = &nd_btt_device_type; dev->groups = nd_btt_attribute_groups; device_initialize(&nd_btt->dev); - if (ndns && !__nd_btt_attach_ndns(nd_btt, ndns)) { + if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) { dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", __func__, dev_name(ndns->claim)); put_device(dev); @@ -375,7 +236,7 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) checksum = le64_to_cpu(super->checksum); super->checksum = 0; - if (checksum != nd_btt_sb_checksum(super)) + if (checksum != nd_sb_checksum((struct nd_gen_sb *) super)) return false; super->checksum = cpu_to_le64(checksum); @@ -387,25 +248,6 @@ bool nd_btt_arena_is_valid(struct nd_btt *nd_btt, struct btt_sb *super) } EXPORT_SYMBOL(nd_btt_arena_is_valid); -/* - * nd_btt_sb_checksum: compute checksum for btt info block - * - * Returns a fletcher64 checksum of everything in the given info block - * except the last field (since that's where the checksum lives). - */ -u64 nd_btt_sb_checksum(struct btt_sb *btt_sb) -{ - u64 sum; - __le64 sum_save; - - sum_save = btt_sb->checksum; - btt_sb->checksum = 0; - sum = nd_fletcher64(btt_sb, sizeof(*btt_sb), 1); - btt_sb->checksum = sum_save; - return sum; -} -EXPORT_SYMBOL(nd_btt_sb_checksum); - static int __nd_btt_probe(struct nd_btt *nd_btt, struct nd_namespace_common *ndns, struct btt_sb *btt_sb) { @@ -453,7 +295,9 @@ int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata) dev_dbg(&ndns->dev, "%s: btt: %s\n", __func__, rc == 0 ? dev_name(dev) : ""); if (rc < 0) { - __nd_btt_detach_ndns(to_nd_btt(dev)); + struct nd_btt *nd_btt = to_nd_btt(dev); + + __nd_detach_ndns(dev, &nd_btt->ndns); put_device(dev); } diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c new file mode 100644 index 000000000000..e8f03b0e95e4 --- /dev/null +++ b/drivers/nvdimm/claim.c @@ -0,0 +1,201 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include "nd-core.h" +#include "pfn.h" +#include "btt.h" +#include "nd.h" + +void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns) +{ + struct nd_namespace_common *ndns = *_ndns; + + dev_WARN_ONCE(dev, !mutex_is_locked(&ndns->dev.mutex) + || ndns->claim != dev, + "%s: invalid claim\n", __func__); + ndns->claim = NULL; + *_ndns = NULL; + put_device(&ndns->dev); +} + +void nd_detach_ndns(struct device *dev, + struct nd_namespace_common **_ndns) +{ + struct nd_namespace_common *ndns = *_ndns; + + if (!ndns) + return; + get_device(&ndns->dev); + device_lock(&ndns->dev); + __nd_detach_ndns(dev, _ndns); + device_unlock(&ndns->dev); + put_device(&ndns->dev); +} + +bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns) +{ + if (attach->claim) + return false; + dev_WARN_ONCE(dev, !mutex_is_locked(&attach->dev.mutex) + || *_ndns, + "%s: invalid claim\n", __func__); + attach->claim = dev; + *_ndns = attach; + get_device(&attach->dev); + return true; +} + +bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns) +{ + bool claimed; + + device_lock(&attach->dev); + claimed = __nd_attach_ndns(dev, attach, _ndns); + device_unlock(&attach->dev); + return claimed; +} + +static int namespace_match(struct device *dev, void *data) +{ + char *name = data; + + return strcmp(name, dev_name(dev)) == 0; +} + +static bool is_idle(struct device *dev, struct nd_namespace_common *ndns) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct device *seed = NULL; + + if (is_nd_btt(dev)) + seed = nd_region->btt_seed; + else if (is_nd_pfn(dev)) + seed = nd_region->pfn_seed; + + if (seed == dev || ndns || dev->driver) + return false; + return true; +} + +static void nd_detach_and_reset(struct device *dev, + struct nd_namespace_common **_ndns) +{ + /* detach the namespace and destroy / reset the device */ + nd_detach_ndns(dev, _ndns); + if (is_idle(dev, *_ndns)) { + nd_device_unregister(dev, ND_ASYNC); + } else if (is_nd_btt(dev)) { + struct nd_btt *nd_btt = to_nd_btt(dev); + + nd_btt->lbasize = 0; + kfree(nd_btt->uuid); + nd_btt->uuid = NULL; + } else if (is_nd_pfn(dev)) { + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + kfree(nd_pfn->uuid); + nd_pfn->uuid = NULL; + nd_pfn->mode = PFN_MODE_NONE; + } +} + +ssize_t nd_namespace_store(struct device *dev, + struct nd_namespace_common **_ndns, const char *buf, + size_t len) +{ + struct nd_namespace_common *ndns; + struct device *found; + char *name; + + if (dev->driver) { + dev_dbg(dev, "%s: -EBUSY\n", __func__); + return -EBUSY; + } + + name = kstrndup(buf, len, GFP_KERNEL); + if (!name) + return -ENOMEM; + strim(name); + + if (strncmp(name, "namespace", 9) == 0 || strcmp(name, "") == 0) + /* pass */; + else { + len = -EINVAL; + goto out; + } + + ndns = *_ndns; + if (strcmp(name, "") == 0) { + nd_detach_and_reset(dev, _ndns); + goto out; + } else if (ndns) { + dev_dbg(dev, "namespace already set to: %s\n", + dev_name(&ndns->dev)); + len = -EBUSY; + goto out; + } + + found = device_find_child(dev->parent, name, namespace_match); + if (!found) { + dev_dbg(dev, "'%s' not found under %s\n", name, + dev_name(dev->parent)); + len = -ENODEV; + goto out; + } + + ndns = to_ndns(found); + if (__nvdimm_namespace_capacity(ndns) < SZ_16M) { + dev_dbg(dev, "%s too small to host\n", name); + len = -ENXIO; + goto out_attach; + } + + WARN_ON_ONCE(!is_nvdimm_bus_locked(dev)); + if (!nd_attach_ndns(dev, ndns, _ndns)) { + dev_dbg(dev, "%s already claimed\n", + dev_name(&ndns->dev)); + len = -EBUSY; + } + + out_attach: + put_device(&ndns->dev); /* from device_find_child */ + out: + kfree(name); + return len; +} + +/* + * nd_sb_checksum: compute checksum for a generic info block + * + * Returns a fletcher64 checksum of everything in the given info block + * except the last field (since that's where the checksum lives). + */ +u64 nd_sb_checksum(struct nd_gen_sb *nd_gen_sb) +{ + u64 sum; + __le64 sum_save; + + BUILD_BUG_ON(sizeof(struct btt_sb) != SZ_4K); + BUILD_BUG_ON(sizeof(struct nd_pfn_sb) != SZ_4K); + BUILD_BUG_ON(sizeof(struct nd_gen_sb) != SZ_4K); + + sum_save = nd_gen_sb->checksum; + nd_gen_sb->checksum = 0; + sum = nd_fletcher64(nd_gen_sb, sizeof(*nd_gen_sb), 1); + nd_gen_sb->checksum = sum_save; + return sum; +} +EXPORT_SYMBOL(nd_sb_checksum); diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index b18ffea9d85b..9303ca29be9b 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -82,8 +82,16 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, struct nd_region *nd_region = to_nd_region(ndns->dev.parent); const char *suffix = ""; - if (ndns->claim && is_nd_btt(ndns->claim)) - suffix = "s"; + if (ndns->claim) { + if (is_nd_btt(ndns->claim)) + suffix = "s"; + else if (is_nd_pfn(ndns->claim)) + suffix = "m"; + else + dev_WARN_ONCE(&ndns->dev, 1, + "unknown claim type by %s\n", + dev_name(ndns->claim)); + } if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) sprintf(name, "pmem%d%s", nd_region->id, suffix); @@ -1255,12 +1263,22 @@ static const struct attribute_group *nd_namespace_attribute_groups[] = { struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) { struct nd_btt *nd_btt = is_nd_btt(dev) ? to_nd_btt(dev) : NULL; + struct nd_pfn *nd_pfn = is_nd_pfn(dev) ? to_nd_pfn(dev) : NULL; struct nd_namespace_common *ndns; resource_size_t size; - if (nd_btt) { - ndns = nd_btt->ndns; - if (!ndns) + if (nd_btt || nd_pfn) { + struct device *host = NULL; + + if (nd_btt) { + host = &nd_btt->dev; + ndns = nd_btt->ndns; + } else if (nd_pfn) { + host = &nd_pfn->dev; + ndns = nd_pfn->ndns; + } + + if (!ndns || !host) return ERR_PTR(-ENODEV); /* @@ -1271,12 +1289,12 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev) device_unlock(&ndns->dev); if (ndns->dev.driver) { dev_dbg(&ndns->dev, "is active, can't bind %s\n", - dev_name(&nd_btt->dev)); + dev_name(host)); return ERR_PTR(-EBUSY); } - if (dev_WARN_ONCE(&ndns->dev, ndns->claim != &nd_btt->dev, + if (dev_WARN_ONCE(&ndns->dev, ndns->claim != host, "host (%s) vs claim (%s) mismatch\n", - dev_name(&nd_btt->dev), + dev_name(host), dev_name(ndns->claim))) return ERR_PTR(-ENXIO); } else { diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h index e1970c71ad1c..159aed532042 100644 --- a/drivers/nvdimm/nd-core.h +++ b/drivers/nvdimm/nd-core.h @@ -80,4 +80,13 @@ struct resource *nsblk_add_resource(struct nd_region *nd_region, int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd); void get_ndd(struct nvdimm_drvdata *ndd); resource_size_t __nvdimm_namespace_capacity(struct nd_namespace_common *ndns); +void nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); +void __nd_detach_ndns(struct device *dev, struct nd_namespace_common **_ndns); +bool nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns); +bool __nd_attach_ndns(struct device *dev, struct nd_namespace_common *attach, + struct nd_namespace_common **_ndns); +ssize_t nd_namespace_store(struct device *dev, + struct nd_namespace_common **_ndns, const char *buf, + size_t len); #endif /* __ND_CORE_H__ */ diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index f9615824947b..8da2be1cecb8 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -29,6 +29,13 @@ enum { ND_MAX_LANES = 256, SECTOR_SHIFT = 9, INT_LBASIZE_ALIGNMENT = 64, +#if IS_ENABLED(CONFIG_NVDIMM_PFN) + ND_PFN_ALIGN = PAGES_PER_SECTION * PAGE_SIZE, + ND_PFN_MASK = ND_PFN_ALIGN - 1, +#else + ND_PFN_ALIGN = 0, + ND_PFN_MASK = 0, +#endif }; struct nvdimm_drvdata { @@ -92,8 +99,10 @@ struct nd_region { struct device dev; struct ida ns_ida; struct ida btt_ida; + struct ida pfn_ida; struct device *ns_seed; struct device *btt_seed; + struct device *pfn_seed; u16 ndr_mappings; u64 ndr_size; u64 ndr_start; @@ -133,6 +142,22 @@ struct nd_btt { int id; }; +enum nd_pfn_mode { + PFN_MODE_NONE, + PFN_MODE_RAM, + PFN_MODE_PMEM, +}; + +struct nd_pfn { + int id; + u8 *uuid; + struct device dev; + unsigned long npfns; + enum nd_pfn_mode mode; + struct nd_pfn_sb *pfn_sb; + struct nd_namespace_common *ndns; +}; + enum nd_async_mode { ND_SYNC, ND_ASYNC, @@ -159,8 +184,13 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd); int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset, void *buf, size_t len); struct nd_btt *to_nd_btt(struct device *dev); -struct btt_sb; -u64 nd_btt_sb_checksum(struct btt_sb *btt_sb); + +struct nd_gen_sb { + char reserved[SZ_4K - 8]; + __le64 checksum; +}; + +u64 nd_sb_checksum(struct nd_gen_sb *sb); #if IS_ENABLED(CONFIG_BTT) int nd_btt_probe(struct nd_namespace_common *ndns, void *drvdata); bool is_nd_btt(struct device *dev); @@ -180,8 +210,30 @@ static inline struct device *nd_btt_create(struct nd_region *nd_region) { return NULL; } +#endif +struct nd_pfn *to_nd_pfn(struct device *dev); +#if IS_ENABLED(CONFIG_NVDIMM_PFN) +int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata); +bool is_nd_pfn(struct device *dev); +struct device *nd_pfn_create(struct nd_region *nd_region); +#else +static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + return -ENODEV; +} + +static inline bool is_nd_pfn(struct device *dev) +{ + return false; +} + +static inline struct device *nd_pfn_create(struct nd_region *nd_region) +{ + return NULL; +} #endif + struct nd_region *to_nd_region(struct device *dev); int nd_region_to_nstype(struct nd_region *nd_region); int nd_region_register_namespaces(struct nd_region *nd_region, int *err); diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h new file mode 100644 index 000000000000..cc243754acef --- /dev/null +++ b/drivers/nvdimm/pfn.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __NVDIMM_PFN_H +#define __NVDIMM_PFN_H + +#include + +#define PFN_SIG_LEN 16 +#define PFN_SIG "NVDIMM_PFN_INFO\0" + +struct nd_pfn_sb { + u8 signature[PFN_SIG_LEN]; + u8 uuid[16]; + u8 parent_uuid[16]; + __le32 flags; + __le16 version_major; + __le16 version_minor; + __le64 dataoff; + __le64 npfns; + __le32 mode; + u8 padding[4012]; + __le64 checksum; +}; +#endif /* __NVDIMM_PFN_H */ diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c new file mode 100644 index 000000000000..f708d63709a5 --- /dev/null +++ b/drivers/nvdimm/pfn_devs.c @@ -0,0 +1,336 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include +#include +#include "nd-core.h" +#include "pfn.h" +#include "nd.h" + +static void nd_pfn_release(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + dev_dbg(dev, "%s\n", __func__); + nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); + ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); + kfree(nd_pfn->uuid); + kfree(nd_pfn); +} + +static struct device_type nd_pfn_device_type = { + .name = "nd_pfn", + .release = nd_pfn_release, +}; + +bool is_nd_pfn(struct device *dev) +{ + return dev ? dev->type == &nd_pfn_device_type : false; +} +EXPORT_SYMBOL(is_nd_pfn); + +struct nd_pfn *to_nd_pfn(struct device *dev) +{ + struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); + + WARN_ON(!is_nd_pfn(dev)); + return nd_pfn; +} +EXPORT_SYMBOL(to_nd_pfn); + +static ssize_t mode_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + switch (nd_pfn->mode) { + case PFN_MODE_RAM: + return sprintf(buf, "ram\n"); + case PFN_MODE_PMEM: + return sprintf(buf, "pmem\n"); + default: + return sprintf(buf, "none\n"); + } +} + +static ssize_t mode_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc = 0; + + device_lock(dev); + nvdimm_bus_lock(dev); + if (dev->driver) + rc = -EBUSY; + else { + size_t n = len - 1; + + if (strncmp(buf, "pmem\n", n) == 0 + || strncmp(buf, "pmem", n) == 0) { + /* TODO: allocate from PMEM support */ + rc = -ENOTTY; + } else if (strncmp(buf, "ram\n", n) == 0 + || strncmp(buf, "ram", n) == 0) + nd_pfn->mode = PFN_MODE_RAM; + else if (strncmp(buf, "none\n", n) == 0 + || strncmp(buf, "none", n) == 0) + nd_pfn->mode = PFN_MODE_NONE; + else + rc = -EINVAL; + } + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + nvdimm_bus_unlock(dev); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(mode); + +static ssize_t uuid_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + + if (nd_pfn->uuid) + return sprintf(buf, "%pUb\n", nd_pfn->uuid); + return sprintf(buf, "\n"); +} + +static ssize_t uuid_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + device_lock(dev); + rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + + return rc ? rc : len; +} +static DEVICE_ATTR_RW(uuid); + +static ssize_t namespace_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + rc = sprintf(buf, "%s\n", nd_pfn->ndns + ? dev_name(&nd_pfn->ndns->dev) : ""); + nvdimm_bus_unlock(dev); + return rc; +} + +static ssize_t namespace_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + device_lock(dev); + rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); + dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__, + rc, buf, buf[len - 1] == '\n' ? "" : "\n"); + device_unlock(dev); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RW(namespace); + +static struct attribute *nd_pfn_attributes[] = { + &dev_attr_mode.attr, + &dev_attr_namespace.attr, + &dev_attr_uuid.attr, + NULL, +}; + +static struct attribute_group nd_pfn_attribute_group = { + .attrs = nd_pfn_attributes, +}; + +static const struct attribute_group *nd_pfn_attribute_groups[] = { + &nd_pfn_attribute_group, + &nd_device_attribute_group, + &nd_numa_attribute_group, + NULL, +}; + +static struct device *__nd_pfn_create(struct nd_region *nd_region, + u8 *uuid, enum nd_pfn_mode mode, + struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn; + struct device *dev; + + /* we can only create pages for contiguous ranged of pmem */ + if (!is_nd_pmem(&nd_region->dev)) + return NULL; + + nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); + if (!nd_pfn) + return NULL; + + nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); + if (nd_pfn->id < 0) { + kfree(nd_pfn); + return NULL; + } + + nd_pfn->mode = mode; + if (uuid) + uuid = kmemdup(uuid, 16, GFP_KERNEL); + nd_pfn->uuid = uuid; + dev = &nd_pfn->dev; + dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); + dev->parent = &nd_region->dev; + dev->type = &nd_pfn_device_type; + dev->groups = nd_pfn_attribute_groups; + device_initialize(&nd_pfn->dev); + if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { + dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n", + __func__, dev_name(ndns->claim)); + put_device(dev); + return NULL; + } + return dev; +} + +struct device *nd_pfn_create(struct nd_region *nd_region) +{ + struct device *dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, + NULL); + + if (dev) + __nd_device_register(dev); + return dev; +} + +static int nd_pfn_validate(struct nd_pfn *nd_pfn) +{ + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; + struct nd_namespace_io *nsio; + u64 checksum, offset; + + if (!pfn_sb || !ndns) + return -ENODEV; + + if (!is_nd_pmem(nd_pfn->dev.parent)) + return -ENODEV; + + /* section alignment for simple hotplug */ + if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN) + return -ENODEV; + + if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb))) + return -ENXIO; + + if (memcmp(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN) != 0) + return -ENODEV; + + checksum = le64_to_cpu(pfn_sb->checksum); + pfn_sb->checksum = 0; + if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) + return -ENODEV; + pfn_sb->checksum = cpu_to_le64(checksum); + + switch (le32_to_cpu(pfn_sb->mode)) { + case PFN_MODE_RAM: + break; + case PFN_MODE_PMEM: + /* TODO: allocate from PMEM support */ + return -ENOTTY; + default: + return -ENXIO; + } + + if (!nd_pfn->uuid) { + /* from probe we allocate */ + nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); + if (!nd_pfn->uuid) + return -ENOMEM; + } else { + /* from init we validate */ + if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) + return -EINVAL; + } + + /* + * These warnings are verbose because they can only trigger in + * the case where the physical address alignment of the + * namespace has changed since the pfn superblock was + * established. + */ + offset = le64_to_cpu(pfn_sb->dataoff); + nsio = to_nd_namespace_io(&ndns->dev); + if ((nsio->res.start + offset) & (ND_PFN_ALIGN - 1)) { + dev_err(&nd_pfn->dev, + "init failed: %s with offset %#llx not section aligned\n", + dev_name(&ndns->dev), offset); + return -EBUSY; + } else if (offset >= resource_size(&nsio->res)) { + dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", + dev_name(&ndns->dev)); + return -EBUSY; + } + + return 0; +} + +int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) +{ + int rc; + struct device *dev; + struct nd_pfn *nd_pfn; + struct nd_pfn_sb *pfn_sb; + struct nd_region *nd_region = to_nd_region(ndns->dev.parent); + + if (ndns->force_raw) + return -ENODEV; + + nvdimm_bus_lock(&ndns->dev); + dev = __nd_pfn_create(nd_region, NULL, PFN_MODE_NONE, ndns); + nvdimm_bus_unlock(&ndns->dev); + if (!dev) + return -ENOMEM; + dev_set_drvdata(dev, drvdata); + pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); + nd_pfn = to_nd_pfn(dev); + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn); + nd_pfn->pfn_sb = NULL; + kfree(pfn_sb); + dev_dbg(&ndns->dev, "%s: pfn: %s\n", __func__, + rc == 0 ? dev_name(dev) : ""); + if (rc < 0) { + __nd_detach_ndns(dev, &nd_pfn->ndns); + put_device(dev); + } else + __nd_device_register(&nd_pfn->dev); + + return rc; +} +EXPORT_SYMBOL(nd_pfn_probe); diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c index f28f78ccff19..7da63eac78ee 100644 --- a/drivers/nvdimm/region.c +++ b/drivers/nvdimm/region.c @@ -53,6 +53,7 @@ static int nd_region_probe(struct device *dev) return -ENODEV; nd_region->btt_seed = nd_btt_create(nd_region); + nd_region->pfn_seed = nd_pfn_create(nd_region); if (err == 0) return 0; @@ -84,6 +85,7 @@ static int nd_region_remove(struct device *dev) nvdimm_bus_lock(dev); nd_region->ns_seed = NULL; nd_region->btt_seed = NULL; + nd_region->pfn_seed = NULL; dev_set_drvdata(dev, NULL); nvdimm_bus_unlock(dev); diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index 7384455792bf..da4338154ad2 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -345,6 +345,23 @@ static ssize_t btt_seed_show(struct device *dev, } static DEVICE_ATTR_RO(btt_seed); +static ssize_t pfn_seed_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct nd_region *nd_region = to_nd_region(dev); + ssize_t rc; + + nvdimm_bus_lock(dev); + if (nd_region->pfn_seed) + rc = sprintf(buf, "%s\n", dev_name(nd_region->pfn_seed)); + else + rc = sprintf(buf, "\n"); + nvdimm_bus_unlock(dev); + + return rc; +} +static DEVICE_ATTR_RO(pfn_seed); + static ssize_t read_only_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -373,6 +390,7 @@ static struct attribute *nd_region_attributes[] = { &dev_attr_nstype.attr, &dev_attr_mappings.attr, &dev_attr_btt_seed.attr, + &dev_attr_pfn_seed.attr, &dev_attr_read_only.attr, &dev_attr_set_cookie.attr, &dev_attr_available_size.attr, @@ -744,6 +762,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->numa_node = ndr_desc->numa_node; ida_init(&nd_region->ns_ida); ida_init(&nd_region->btt_ida); + ida_init(&nd_region->pfn_ida); dev = &nd_region->dev; dev_set_name(dev, "region%d", nd_region->id); dev->parent = &nvdimm_bus->dev; diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 98f2881ba6a2..99e70f0729be 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -43,7 +43,9 @@ libnvdimm-y += $(NVDIMM_SRC)/region_devs.o libnvdimm-y += $(NVDIMM_SRC)/region.o libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o libnvdimm-y += $(NVDIMM_SRC)/label.o +libnvdimm-$(CONFIG_ND_CLAIM) += $(NVDIMM_SRC)/claim.o libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o +libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o libnvdimm-y += config_check.o obj-m += test/ -- cgit v1.2.3-70-g09d2 From 32ab0a3f51701cb37ab960635254d5f84ec3de0a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 1 Aug 2015 02:16:37 -0400 Subject: libnvdimm, pmem: 'struct page' for pmem Enable the pmem driver to handle PFN device instances. Attaching a pmem namespace to a pfn device triggers the driver to allocate and initialize struct page entries for pmem. Memory capacity for this allocation comes exclusively from RAM for now which is suitable for low PMEM to RAM ratios. This mechanism will be expanded later for setting an "allocate from PMEM" policy. Cc: Boaz Harrosh Cc: Ross Zwisler Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/nvdimm/Kconfig | 1 + drivers/nvdimm/nd.h | 6 ++ drivers/nvdimm/pfn_devs.c | 9 +- drivers/nvdimm/pmem.c | 203 +++++++++++++++++++++++++++++++++++--- tools/testing/nvdimm/Kbuild | 1 + tools/testing/nvdimm/test/iomap.c | 13 +++ 6 files changed, 216 insertions(+), 17 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig index ace25b53b755..53c11621d5b1 100644 --- a/drivers/nvdimm/Kconfig +++ b/drivers/nvdimm/Kconfig @@ -76,6 +76,7 @@ config ND_PFN config NVDIMM_PFN bool "PFN: Map persistent (device) memory" default LIBNVDIMM + depends on ZONE_DEVICE select ND_CLAIM help Map persistent memory, i.e. advertise it to the memory diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 8da2be1cecb8..83e5d0935012 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -217,6 +217,7 @@ struct nd_pfn *to_nd_pfn(struct device *dev); int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata); bool is_nd_pfn(struct device *dev); struct device *nd_pfn_create(struct nd_region *nd_region); +int nd_pfn_validate(struct nd_pfn *nd_pfn); #else static inline int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) { @@ -232,6 +233,11 @@ static inline struct device *nd_pfn_create(struct nd_region *nd_region) { return NULL; } + +static inline int nd_pfn_validate(struct nd_pfn *nd_pfn) +{ + return -ENODEV; +} #endif struct nd_region *to_nd_region(struct device *dev); diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c index f708d63709a5..3fd7d0d81a47 100644 --- a/drivers/nvdimm/pfn_devs.c +++ b/drivers/nvdimm/pfn_devs.c @@ -228,7 +228,7 @@ struct device *nd_pfn_create(struct nd_region *nd_region) return dev; } -static int nd_pfn_validate(struct nd_pfn *nd_pfn) +int nd_pfn_validate(struct nd_pfn *nd_pfn) { struct nd_namespace_common *ndns = nd_pfn->ndns; struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; @@ -286,10 +286,10 @@ static int nd_pfn_validate(struct nd_pfn *nd_pfn) */ offset = le64_to_cpu(pfn_sb->dataoff); nsio = to_nd_namespace_io(&ndns->dev); - if ((nsio->res.start + offset) & (ND_PFN_ALIGN - 1)) { + if (nsio->res.start & ND_PFN_MASK) { dev_err(&nd_pfn->dev, - "init failed: %s with offset %#llx not section aligned\n", - dev_name(&ndns->dev), offset); + "init failed: %s not section aligned\n", + dev_name(&ndns->dev)); return -EBUSY; } else if (offset >= resource_size(&nsio->res)) { dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", @@ -299,6 +299,7 @@ static int nd_pfn_validate(struct nd_pfn *nd_pfn) return 0; } +EXPORT_SYMBOL(nd_pfn_validate); int nd_pfn_probe(struct nd_namespace_common *ndns, void *drvdata) { diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 20bf122328da..2f885e5d9c36 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -21,18 +21,24 @@ #include #include #include +#include #include +#include #include #include #include +#include "pfn.h" #include "nd.h" struct pmem_device { struct request_queue *pmem_queue; struct gendisk *pmem_disk; + struct nd_namespace_common *ndns; /* One contiguous memory region per device */ phys_addr_t phys_addr; + /* when non-zero this device is hosting a 'pfn' instance */ + phys_addr_t data_offset; void __pmem *virt_addr; size_t size; }; @@ -44,7 +50,7 @@ static void pmem_do_bvec(struct pmem_device *pmem, struct page *page, sector_t sector) { void *mem = kmap_atomic(page); - size_t pmem_off = sector << 9; + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; void __pmem *pmem_addr = pmem->virt_addr + pmem_off; if (rw == READ) { @@ -95,16 +101,23 @@ static long pmem_direct_access(struct block_device *bdev, sector_t sector, void __pmem **kaddr, unsigned long *pfn) { struct pmem_device *pmem = bdev->bd_disk->private_data; - size_t offset = sector << 9; - - if (!pmem) - return -ENODEV; + resource_size_t offset = sector * 512 + pmem->data_offset; + resource_size_t size; + + if (pmem->data_offset) { + /* + * Limit the direct_access() size to what is covered by + * the memmap + */ + size = (pmem->size - offset) & ~ND_PFN_MASK; + } else + size = pmem->size - offset; /* FIXME convert DAX to comprehend that this mapping has a lifetime */ *kaddr = pmem->virt_addr + offset; *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT; - return pmem->size - offset; + return size; } static const struct block_device_operations pmem_fops = { @@ -144,13 +157,16 @@ static struct pmem_device *pmem_alloc(struct device *dev, static void pmem_detach_disk(struct pmem_device *pmem) { + if (!pmem->pmem_disk) + return; + del_gendisk(pmem->pmem_disk); put_disk(pmem->pmem_disk); blk_cleanup_queue(pmem->pmem_queue); } -static int pmem_attach_disk(struct nd_namespace_common *ndns, - struct pmem_device *pmem) +static int pmem_attach_disk(struct device *dev, + struct nd_namespace_common *ndns, struct pmem_device *pmem) { struct gendisk *disk; @@ -177,8 +193,8 @@ static int pmem_attach_disk(struct nd_namespace_common *ndns, disk->queue = pmem->pmem_queue; disk->flags = GENHD_FL_EXT_DEVT; nvdimm_namespace_disk_name(ndns, disk->disk_name); - disk->driverfs_dev = &ndns->dev; - set_capacity(disk, pmem->size >> 9); + disk->driverfs_dev = dev; + set_capacity(disk, (pmem->size - pmem->data_offset) / 512); pmem->pmem_disk = disk; add_disk(disk); @@ -207,6 +223,154 @@ static int pmem_rw_bytes(struct nd_namespace_common *ndns, return 0; } +static int nd_pfn_init(struct nd_pfn *nd_pfn) +{ + struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL); + struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev); + struct nd_namespace_common *ndns = nd_pfn->ndns; + struct nd_region *nd_region; + unsigned long npfns; + phys_addr_t offset; + u64 checksum; + int rc; + + if (!pfn_sb) + return -ENOMEM; + + nd_pfn->pfn_sb = pfn_sb; + rc = nd_pfn_validate(nd_pfn); + if (rc == 0 || rc == -EBUSY) + return rc; + + /* section alignment for simple hotplug */ + if (nvdimm_namespace_capacity(ndns) < ND_PFN_ALIGN + || pmem->phys_addr & ND_PFN_MASK) + return -ENODEV; + + nd_region = to_nd_region(nd_pfn->dev.parent); + if (nd_region->ro) { + dev_info(&nd_pfn->dev, + "%s is read-only, unable to init metadata\n", + dev_name(&nd_region->dev)); + goto err; + } + + memset(pfn_sb, 0, sizeof(*pfn_sb)); + npfns = (pmem->size - SZ_8K) / SZ_4K; + /* + * Note, we use 64 here for the standard size of struct page, + * debugging options may cause it to be larger in which case the + * implementation will limit the pfns advertised through + * ->direct_access() to those that are included in the memmap. + */ + if (nd_pfn->mode == PFN_MODE_PMEM) + offset = ALIGN(SZ_8K + 64 * npfns, PMD_SIZE); + else if (nd_pfn->mode == PFN_MODE_RAM) + offset = SZ_8K; + else + goto err; + + npfns = (pmem->size - offset) / SZ_4K; + pfn_sb->mode = cpu_to_le32(nd_pfn->mode); + pfn_sb->dataoff = cpu_to_le64(offset); + pfn_sb->npfns = cpu_to_le64(npfns); + memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN); + memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); + pfn_sb->version_major = cpu_to_le16(1); + checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); + pfn_sb->checksum = cpu_to_le64(checksum); + + rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb)); + if (rc) + goto err; + + return 0; + err: + nd_pfn->pfn_sb = NULL; + kfree(pfn_sb); + return -ENXIO; +} + +static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct pmem_device *pmem; + + /* free pmem disk */ + pmem = dev_get_drvdata(&nd_pfn->dev); + pmem_detach_disk(pmem); + + /* release nd_pfn resources */ + kfree(nd_pfn->pfn_sb); + nd_pfn->pfn_sb = NULL; + + return 0; +} + +static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns) +{ + struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); + struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim); + struct device *dev = &nd_pfn->dev; + struct vmem_altmap *altmap; + struct nd_region *nd_region; + struct nd_pfn_sb *pfn_sb; + struct pmem_device *pmem; + phys_addr_t offset; + int rc; + + if (!nd_pfn->uuid || !nd_pfn->ndns) + return -ENODEV; + + nd_region = to_nd_region(dev->parent); + rc = nd_pfn_init(nd_pfn); + if (rc) + return rc; + + if (PAGE_SIZE != SZ_4K) { + dev_err(dev, "only supported on systems with 4K PAGE_SIZE\n"); + return -ENXIO; + } + if (nsio->res.start & ND_PFN_MASK) { + dev_err(dev, "%s not memory hotplug section aligned\n", + dev_name(&ndns->dev)); + return -ENXIO; + } + + pfn_sb = nd_pfn->pfn_sb; + offset = le64_to_cpu(pfn_sb->dataoff); + nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode); + if (nd_pfn->mode == PFN_MODE_RAM) { + if (offset != SZ_8K) + return -EINVAL; + nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); + altmap = NULL; + } else { + rc = -ENXIO; + goto err; + } + + /* establish pfn range for lookup, and switch to direct map */ + pmem = dev_get_drvdata(dev); + memunmap_pmem(dev, pmem->virt_addr); + pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, &nsio->res); + if (IS_ERR(pmem->virt_addr)) { + rc = PTR_ERR(pmem->virt_addr); + goto err; + } + + /* attach pmem disk in "pfn-mode" */ + pmem->data_offset = offset; + rc = pmem_attach_disk(dev, ndns, pmem); + if (rc) + goto err; + + return rc; + err: + nvdimm_namespace_detach_pfn(ndns); + return rc; +} + static int nd_pmem_probe(struct device *dev) { struct nd_region *nd_region = to_nd_region(dev->parent); @@ -223,16 +387,27 @@ static int nd_pmem_probe(struct device *dev) if (IS_ERR(pmem)) return PTR_ERR(pmem); + pmem->ndns = ndns; dev_set_drvdata(dev, pmem); ndns->rw_bytes = pmem_rw_bytes; if (is_nd_btt(dev)) return nvdimm_namespace_attach_btt(ndns); - if (nd_btt_probe(ndns, pmem) == 0) + if (is_nd_pfn(dev)) + return nvdimm_namespace_attach_pfn(ndns); + + if (nd_btt_probe(ndns, pmem) == 0) { /* we'll come back as btt-pmem */ return -ENXIO; - return pmem_attach_disk(ndns, pmem); + } + + if (nd_pfn_probe(ndns, pmem) == 0) { + /* we'll come back as pfn-pmem */ + return -ENXIO; + } + + return pmem_attach_disk(dev, ndns, pmem); } static int nd_pmem_remove(struct device *dev) @@ -240,7 +415,9 @@ static int nd_pmem_remove(struct device *dev) struct pmem_device *pmem = dev_get_drvdata(dev); if (is_nd_btt(dev)) - nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns); + nvdimm_namespace_detach_btt(pmem->ndns); + else if (is_nd_pfn(dev)) + nvdimm_namespace_detach_pfn(pmem->ndns); else pmem_detach_disk(pmem); diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild index 99e70f0729be..38b00ecb2ed5 100644 --- a/tools/testing/nvdimm/Kbuild +++ b/tools/testing/nvdimm/Kbuild @@ -2,6 +2,7 @@ ldflags-y += --wrap=ioremap_wc ldflags-y += --wrap=memremap ldflags-y += --wrap=devm_ioremap_nocache ldflags-y += --wrap=devm_memremap +ldflags-y += --wrap=devm_memunmap ldflags-y += --wrap=ioremap_nocache ldflags-y += --wrap=iounmap ldflags-y += --wrap=memunmap diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 179d2289f3a8..b7251314bbc0 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -108,6 +108,19 @@ void *__wrap_memremap(resource_size_t offset, size_t size, } EXPORT_SYMBOL(__wrap_memremap); +void __wrap_devm_memunmap(struct device *dev, void *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return devm_memunmap(dev, addr); +} +EXPORT_SYMBOL(__wrap_devm_memunmap); + void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) { return __nfit_test_ioremap(offset, size, ioremap_nocache); -- cgit v1.2.3-70-g09d2 From 004f1afbe199e6ab20805b95aefd83ccd24bc5c7 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 24 Aug 2015 19:20:23 -0400 Subject: libnvdimm, pmem: direct map legacy pmem by default The expectation is that the legacy / non-standard pmem discovery method (e820 type-12) will only ever be used to describe small quantities of persistent memory. Larger capacities will be described via the ACPI NFIT. When "allocate struct page from pmem" support is added this default policy can be overridden by assigning a legacy pmem namespace to a pfn device, however this would be only be necessary if a platform used the legacy mechanism to define a very large range. Cc: Christoph Hellwig Signed-off-by: Dan Williams --- drivers/nvdimm/e820.c | 1 + drivers/nvdimm/namespace_devs.c | 35 ++++++++++++++++++++++++++++++----- drivers/nvdimm/nd.h | 2 ++ drivers/nvdimm/pmem.c | 15 ++++++++++++--- drivers/nvdimm/region_devs.c | 1 + include/linux/libnvdimm.h | 4 ++++ 6 files changed, 50 insertions(+), 8 deletions(-) (limited to 'drivers/nvdimm') diff --git a/drivers/nvdimm/e820.c b/drivers/nvdimm/e820.c index 1b5743ad92db..8282db2ef99e 100644 --- a/drivers/nvdimm/e820.c +++ b/drivers/nvdimm/e820.c @@ -49,6 +49,7 @@ static int e820_pmem_probe(struct platform_device *pdev) ndr_desc.res = p; ndr_desc.attr_groups = e820_pmem_region_attribute_groups; ndr_desc.numa_node = NUMA_NO_NODE; + set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags); if (!nvdimm_pmem_region_create(nvdimm_bus, &ndr_desc)) goto err; } diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c index 9303ca29be9b..0955b2cb10fe 100644 --- a/drivers/nvdimm/namespace_devs.c +++ b/drivers/nvdimm/namespace_devs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include "nd-core.h" #include "nd.h" @@ -76,11 +77,32 @@ static bool is_namespace_io(struct device *dev) return dev ? dev->type == &namespace_io_device_type : false; } +bool pmem_should_map_pages(struct device *dev) +{ + struct nd_region *nd_region = to_nd_region(dev->parent); + + if (!IS_ENABLED(CONFIG_ZONE_DEVICE)) + return false; + + if (!test_bit(ND_REGION_PAGEMAP, &nd_region->flags)) + return false; + + if (is_nd_pfn(dev) || is_nd_btt(dev)) + return false; + +#ifdef ARCH_MEMREMAP_PMEM + return ARCH_MEMREMAP_PMEM == MEMREMAP_WB; +#else + return false; +#endif +} +EXPORT_SYMBOL(pmem_should_map_pages); + const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, char *name) { struct nd_region *nd_region = to_nd_region(ndns->dev.parent); - const char *suffix = ""; + const char *suffix = NULL; if (ndns->claim) { if (is_nd_btt(ndns->claim)) @@ -93,13 +115,16 @@ const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns, dev_name(ndns->claim)); } - if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) - sprintf(name, "pmem%d%s", nd_region->id, suffix); - else if (is_namespace_blk(&ndns->dev)) { + if (is_namespace_pmem(&ndns->dev) || is_namespace_io(&ndns->dev)) { + if (!suffix && pmem_should_map_pages(&ndns->dev)) + suffix = "m"; + sprintf(name, "pmem%d%s", nd_region->id, suffix ? suffix : ""); + } else if (is_namespace_blk(&ndns->dev)) { struct nd_namespace_blk *nsblk; nsblk = to_nd_namespace_blk(&ndns->dev); - sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, suffix); + sprintf(name, "ndblk%d.%d%s", nd_region->id, nsblk->id, + suffix ? suffix : ""); } else { return NULL; } diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index 83e5d0935012..417e521d299c 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -100,6 +100,7 @@ struct nd_region { struct ida ns_ida; struct ida btt_ida; struct ida pfn_ida; + unsigned long flags; struct device *ns_seed; struct device *btt_seed; struct device *pfn_seed; @@ -276,4 +277,5 @@ static inline bool nd_iostat_start(struct bio *bio, unsigned long *start) void nd_iostat_end(struct bio *bio, unsigned long start); resource_size_t nd_namespace_blk_validate(struct nd_namespace_blk *nsblk); const u8 *nd_dev_to_uuid(struct device *dev); +bool pmem_should_map_pages(struct device *dev); #endif /* __ND_H__ */ diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 2f885e5d9c36..c5ae2e579288 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -148,9 +148,18 @@ static struct pmem_device *pmem_alloc(struct device *dev, return ERR_PTR(-EBUSY); } - pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, pmem->size); - if (!pmem->virt_addr) - return ERR_PTR(-ENXIO); + if (pmem_should_map_pages(dev)) { + void *addr = devm_memremap_pages(dev, res); + + if (IS_ERR(addr)) + return addr; + pmem->virt_addr = (void __pmem *) addr; + } else { + pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr, + pmem->size); + if (!pmem->virt_addr) + return ERR_PTR(-ENXIO); + } return pmem; } diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c index da4338154ad2..529f3f02e7b2 100644 --- a/drivers/nvdimm/region_devs.c +++ b/drivers/nvdimm/region_devs.c @@ -758,6 +758,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus, nd_region->provider_data = ndr_desc->provider_data; nd_region->nd_set = ndr_desc->nd_set; nd_region->num_lanes = ndr_desc->num_lanes; + nd_region->flags = ndr_desc->flags; nd_region->ro = ro; nd_region->numa_node = ndr_desc->numa_node; ida_init(&nd_region->ns_ida); diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 75e3af01ee32..3f021dc5da8c 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -31,6 +31,9 @@ enum { ND_CMD_ARS_STATUS_MAX = SZ_4K, ND_MAX_MAPPINGS = 32, + /* region flag indicating to direct-map persistent memory by default */ + ND_REGION_PAGEMAP = 0, + /* mark newly adjusted resources as requiring a label update */ DPA_RESOURCE_ADJUSTED = 1 << 0, }; @@ -91,6 +94,7 @@ struct nd_region_desc { void *provider_data; int num_lanes; int numa_node; + unsigned long flags; }; struct nvdimm_bus; -- cgit v1.2.3-70-g09d2