diff options
Diffstat (limited to 'drivers/cxl/pci.c')
-rw-r--r-- | drivers/cxl/pci.c | 383 |
1 files changed, 241 insertions, 142 deletions
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index 8dc91fd3396a..8a7267d116b7 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -1,14 +1,16 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ #include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/moduleparam.h> #include <linux/module.h> +#include <linux/delay.h> #include <linux/sizes.h> #include <linux/mutex.h> #include <linux/list.h> #include <linux/pci.h> #include <linux/io.h> #include "cxlmem.h" -#include "pci.h" +#include "cxlpci.h" #include "cxl.h" /** @@ -35,6 +37,20 @@ /* CXL 2.0 - 8.2.8.4 */ #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) +/* + * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to + * dictate how long to wait for the mailbox to become ready. The new + * field allows the device to tell software the amount of time to wait + * before mailbox ready. This field per the spec theoretically allows + * for up to 255 seconds. 255 seconds is unreasonably long, its longer + * than the maximum SATA port link recovery wait. Default to 60 seconds + * until someone builds a CXL device that needs more time in practice. + */ +static unsigned short mbox_ready_timeout = 60; +module_param(mbox_ready_timeout, ushort, 0644); +MODULE_PARM_DESC(mbox_ready_timeout, + "seconds to wait for mailbox ready / memory active status"); + static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) { const unsigned long start = jiffies; @@ -57,14 +73,16 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) return 0; } -static void cxl_pci_mbox_timeout(struct cxl_dev_state *cxlds, - struct cxl_mbox_cmd *mbox_cmd) -{ - struct device *dev = cxlds->dev; +#define cxl_err(dev, status, msg) \ + dev_err_ratelimited(dev, msg ", device state %s%s\n", \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") - dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n", - mbox_cmd->opcode, mbox_cmd->size_in); -} +#define cxl_cmd_err(dev, cmd, status, msg) \ + dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ + (cmd)->opcode, \ + status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ + status & CXLMDEV_FW_HALT ? " firmware-halt" : "") /** * __cxl_pci_mbox_send_cmd() - Execute a mailbox command @@ -118,7 +136,11 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #1 */ if (cxl_doorbell_busy(cxlds)) { - dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n"); + u64 md_status = + readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, + "mailbox queue busy"); return -EBUSY; } @@ -144,7 +166,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, /* #5 */ rc = cxl_pci_mbox_wait_for_doorbell(cxlds); if (rc == -ETIMEDOUT) { - cxl_pci_mbox_timeout(cxlds, mbox_cmd); + u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + + cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); return rc; } @@ -182,98 +206,13 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds, return 0; } -/** - * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox. - * @cxlds: The device state to gain access to. - * - * Context: Any context. Takes the mbox_mutex. - * Return: 0 if exclusive access was acquired. - */ -static int cxl_pci_mbox_get(struct cxl_dev_state *cxlds) -{ - struct device *dev = cxlds->dev; - u64 md_status; - int rc; - - mutex_lock_io(&cxlds->mbox_mutex); - - /* - * XXX: There is some amount of ambiguity in the 2.0 version of the spec - * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the - * bit is to allow firmware running on the device to notify the driver - * that it's ready to receive commands. It is unclear if the bit needs - * to be read for each transaction mailbox, ie. the firmware can switch - * it on and off as needed. Second, there is no defined timeout for - * mailbox ready, like there is for the doorbell interface. - * - * Assumptions: - * 1. The firmware might toggle the Mailbox Interface Ready bit, check - * it for every command. - * - * 2. If the doorbell is clear, the firmware should have first set the - * Mailbox Interface Ready bit. Therefore, waiting for the doorbell - * to be ready is sufficient. - */ - rc = cxl_pci_mbox_wait_for_doorbell(cxlds); - if (rc) { - dev_warn(dev, "Mailbox interface not ready\n"); - goto out; - } - - md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); - if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) { - dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n"); - rc = -EBUSY; - goto out; - } - - /* - * Hardware shouldn't allow a ready status but also have failure bits - * set. Spit out an error, this should be a bug report - */ - rc = -EFAULT; - if (md_status & CXLMDEV_DEV_FATAL) { - dev_err(dev, "mbox: reported ready, but fatal\n"); - goto out; - } - if (md_status & CXLMDEV_FW_HALT) { - dev_err(dev, "mbox: reported ready, but halted\n"); - goto out; - } - if (CXLMDEV_RESET_NEEDED(md_status)) { - dev_err(dev, "mbox: reported ready, but reset needed\n"); - goto out; - } - - /* with lock held */ - return 0; - -out: - mutex_unlock(&cxlds->mbox_mutex); - return rc; -} - -/** - * cxl_pci_mbox_put() - Release exclusive access to the mailbox. - * @cxlds: The device state to communicate with. - * - * Context: Any context. Expects mbox_mutex to be held. - */ -static void cxl_pci_mbox_put(struct cxl_dev_state *cxlds) -{ - mutex_unlock(&cxlds->mbox_mutex); -} - static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd) { int rc; - rc = cxl_pci_mbox_get(cxlds); - if (rc) - return rc; - + mutex_lock_io(&cxlds->mbox_mutex); rc = __cxl_pci_mbox_send_cmd(cxlds, cmd); - cxl_pci_mbox_put(cxlds); + mutex_unlock(&cxlds->mbox_mutex); return rc; } @@ -281,6 +220,34 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds) { const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); + unsigned long timeout; + u64 md_status; + + timeout = jiffies + mbox_ready_timeout * HZ; + do { + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (md_status & CXLMDEV_MBOX_IF_READY) + break; + if (msleep_interruptible(100)) + break; + } while (!time_after(jiffies, timeout)); + + if (!(md_status & CXLMDEV_MBOX_IF_READY)) { + cxl_err(cxlds->dev, md_status, + "timeout awaiting mailbox ready"); + return -ETIMEDOUT; + } + + /* + * A command may be in flight from a previous driver instance, + * think kexec, do one doorbell wait so that + * __cxl_pci_mbox_send_cmd() can assume that it is the only + * source for future doorbell busy events. + */ + if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { + cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle"); + return -ETIMEDOUT; + } cxlds->mbox_send = cxl_pci_mbox_send; cxlds->payload_size = @@ -400,75 +367,182 @@ static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *ma return 0; } -static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, - struct cxl_register_map *map) +static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, + struct cxl_register_map *map) { - map->block_offset = - ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); - map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); - map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); + int rc; + + rc = cxl_find_regblock(pdev, type, map); + if (rc) + return rc; + + rc = cxl_map_regblock(pdev, map); + if (rc) + return rc; + + rc = cxl_probe_regs(pdev, map); + cxl_unmap_regblock(pdev, map); + + return rc; } -/** - * cxl_find_regblock() - Locate register blocks by type - * @pdev: The CXL PCI device to enumerate. - * @type: Register Block Indicator id - * @map: Enumeration output, clobbered on error - * - * Return: 0 if register block enumerated, negative error code otherwise - * - * A CXL DVSEC may point to one or more register blocks, search for them - * by @type. - */ -static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) +static int wait_for_valid(struct cxl_dev_state *cxlds) { - u32 regloc_size, regblocks; - int regloc, i; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec, rc; + u32 val; - regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, - PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID); - if (!regloc) - return -ENXIO; + /* + * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high + * and Size Low registers are valid. Must be set within 1 second of + * deassertion of reset to CXL device. Likely it is already set by the + * time this runs, but otherwise give a 1.5 second timeout in case of + * clock skew. + */ + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; + + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; + + msleep(1500); + + rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val); + if (rc) + return rc; - pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); - regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); + if (val & CXL_DVSEC_MEM_INFO_VALID) + return 0; - regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; - regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; + return -ETIMEDOUT; +} + +/* + * Wait up to @mbox_ready_timeout for the device to report memory + * active. + */ +static int wait_for_media_ready(struct cxl_dev_state *cxlds) +{ + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + bool active = false; + u64 md_status; + int rc, i; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; - for (i = 0; i < regblocks; i++, regloc += 8) { - u32 reg_lo, reg_hi; + for (i = mbox_ready_timeout; i; i--) { + u32 temp; + int rc; - pci_read_config_dword(pdev, regloc, ®_lo); - pci_read_config_dword(pdev, regloc + 4, ®_hi); + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp); + if (rc) + return rc; - cxl_decode_regblock(reg_lo, reg_hi, map); + active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); + if (active) + break; + msleep(1000); + } - if (map->reg_type == type) - return 0; + if (!active) { + dev_err(&pdev->dev, + "timeout awaiting memory active after %d seconds\n", + mbox_ready_timeout); + return -ETIMEDOUT; } - return -ENODEV; + md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); + if (!CXLMDEV_READY(md_status)) + return -EIO; + + return 0; } -static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, - struct cxl_register_map *map) +static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds) { - int rc; + struct cxl_endpoint_dvsec_info *info = &cxlds->info; + struct pci_dev *pdev = to_pci_dev(cxlds->dev); + int d = cxlds->cxl_dvsec; + int hdm_count, rc, i; + u16 cap, ctrl; - rc = cxl_find_regblock(pdev, type, map); + if (!d) + return -ENXIO; + + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); if (rc) return rc; - rc = cxl_map_regblock(pdev, map); + rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); if (rc) return rc; - rc = cxl_probe_regs(pdev, map); - cxl_unmap_regblock(pdev, map); + if (!(cap & CXL_DVSEC_MEM_CAPABLE)) + return -ENXIO; - return rc; + /* + * It is not allowed by spec for MEM.capable to be set and have 0 legacy + * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this + * driver is for a spec defined class code which must be CXL.mem + * capable, there is no point in continuing to enable CXL.mem. + */ + hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); + if (!hdm_count || hdm_count > 2) + return -EINVAL; + + rc = wait_for_valid(cxlds); + if (rc) + return rc; + + info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); + + for (i = 0; i < hdm_count; i++) { + u64 base, size; + u32 temp; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); + if (rc) + return rc; + + size = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); + if (rc) + return rc; + + size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); + if (rc) + return rc; + + base = (u64)temp << 32; + + rc = pci_read_config_dword( + pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); + if (rc) + return rc; + + base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; + + info->dvsec_range[i] = (struct range) { + .start = base, + .end = base + size - 1 + }; + + if (size) + info->ranges++; + } + + return 0; } static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) @@ -493,6 +567,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (IS_ERR(cxlds)) return PTR_ERR(cxlds); + cxlds->serial = pci_get_dsn(pdev); + cxlds->cxl_dvsec = pci_find_dvsec_capability( + pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE); + if (!cxlds->cxl_dvsec) + dev_warn(&pdev->dev, + "Device DVSEC not present, skip CXL.mem init\n"); + + cxlds->wait_media_ready = wait_for_media_ready; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); if (rc) return rc; @@ -501,6 +584,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + /* + * If the component registers can't be found, the cxl_pci driver may + * still be useful for management functions so don't return an error. + */ + cxlds->component_reg_phys = CXL_RESOURCE_NONE; + rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map); + if (rc) + dev_warn(&pdev->dev, "No component registers (%d)\n", rc); + + cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map); + rc = cxl_pci_setup_mailbox(cxlds); if (rc) return rc; @@ -517,6 +611,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; + rc = cxl_dvsec_ranges(cxlds); + if (rc) + dev_warn(&pdev->dev, + "Failed to get DVSEC range information (%d)\n", rc); + cxlmd = devm_cxl_add_memdev(cxlds); if (IS_ERR(cxlmd)) return PTR_ERR(cxlmd); |