summaryrefslogtreecommitdiff
path: root/drivers/cxl/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cxl/pci.c')
-rw-r--r--drivers/cxl/pci.c383
1 files changed, 241 insertions, 142 deletions
diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c
index 8dc91fd3396a..8a7267d116b7 100644
--- a/drivers/cxl/pci.c
+++ b/drivers/cxl/pci.c
@@ -1,14 +1,16 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/moduleparam.h>
#include <linux/module.h>
+#include <linux/delay.h>
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/io.h>
#include "cxlmem.h"
-#include "pci.h"
+#include "cxlpci.h"
#include "cxl.h"
/**
@@ -35,6 +37,20 @@
/* CXL 2.0 - 8.2.8.4 */
#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
+/*
+ * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
+ * dictate how long to wait for the mailbox to become ready. The new
+ * field allows the device to tell software the amount of time to wait
+ * before mailbox ready. This field per the spec theoretically allows
+ * for up to 255 seconds. 255 seconds is unreasonably long, its longer
+ * than the maximum SATA port link recovery wait. Default to 60 seconds
+ * until someone builds a CXL device that needs more time in practice.
+ */
+static unsigned short mbox_ready_timeout = 60;
+module_param(mbox_ready_timeout, ushort, 0644);
+MODULE_PARM_DESC(mbox_ready_timeout,
+ "seconds to wait for mailbox ready / memory active status");
+
static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
{
const unsigned long start = jiffies;
@@ -57,14 +73,16 @@ static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
return 0;
}
-static void cxl_pci_mbox_timeout(struct cxl_dev_state *cxlds,
- struct cxl_mbox_cmd *mbox_cmd)
-{
- struct device *dev = cxlds->dev;
+#define cxl_err(dev, status, msg) \
+ dev_err_ratelimited(dev, msg ", device state %s%s\n", \
+ status & CXLMDEV_DEV_FATAL ? " fatal" : "", \
+ status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
- dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
- mbox_cmd->opcode, mbox_cmd->size_in);
-}
+#define cxl_cmd_err(dev, cmd, status, msg) \
+ dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \
+ (cmd)->opcode, \
+ status & CXLMDEV_DEV_FATAL ? " fatal" : "", \
+ status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
/**
* __cxl_pci_mbox_send_cmd() - Execute a mailbox command
@@ -118,7 +136,11 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
/* #1 */
if (cxl_doorbell_busy(cxlds)) {
- dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n");
+ u64 md_status =
+ readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+
+ cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
+ "mailbox queue busy");
return -EBUSY;
}
@@ -144,7 +166,9 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
/* #5 */
rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
if (rc == -ETIMEDOUT) {
- cxl_pci_mbox_timeout(cxlds, mbox_cmd);
+ u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+
+ cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
return rc;
}
@@ -182,98 +206,13 @@ static int __cxl_pci_mbox_send_cmd(struct cxl_dev_state *cxlds,
return 0;
}
-/**
- * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox.
- * @cxlds: The device state to gain access to.
- *
- * Context: Any context. Takes the mbox_mutex.
- * Return: 0 if exclusive access was acquired.
- */
-static int cxl_pci_mbox_get(struct cxl_dev_state *cxlds)
-{
- struct device *dev = cxlds->dev;
- u64 md_status;
- int rc;
-
- mutex_lock_io(&cxlds->mbox_mutex);
-
- /*
- * XXX: There is some amount of ambiguity in the 2.0 version of the spec
- * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the
- * bit is to allow firmware running on the device to notify the driver
- * that it's ready to receive commands. It is unclear if the bit needs
- * to be read for each transaction mailbox, ie. the firmware can switch
- * it on and off as needed. Second, there is no defined timeout for
- * mailbox ready, like there is for the doorbell interface.
- *
- * Assumptions:
- * 1. The firmware might toggle the Mailbox Interface Ready bit, check
- * it for every command.
- *
- * 2. If the doorbell is clear, the firmware should have first set the
- * Mailbox Interface Ready bit. Therefore, waiting for the doorbell
- * to be ready is sufficient.
- */
- rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
- if (rc) {
- dev_warn(dev, "Mailbox interface not ready\n");
- goto out;
- }
-
- md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
- if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
- dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
- rc = -EBUSY;
- goto out;
- }
-
- /*
- * Hardware shouldn't allow a ready status but also have failure bits
- * set. Spit out an error, this should be a bug report
- */
- rc = -EFAULT;
- if (md_status & CXLMDEV_DEV_FATAL) {
- dev_err(dev, "mbox: reported ready, but fatal\n");
- goto out;
- }
- if (md_status & CXLMDEV_FW_HALT) {
- dev_err(dev, "mbox: reported ready, but halted\n");
- goto out;
- }
- if (CXLMDEV_RESET_NEEDED(md_status)) {
- dev_err(dev, "mbox: reported ready, but reset needed\n");
- goto out;
- }
-
- /* with lock held */
- return 0;
-
-out:
- mutex_unlock(&cxlds->mbox_mutex);
- return rc;
-}
-
-/**
- * cxl_pci_mbox_put() - Release exclusive access to the mailbox.
- * @cxlds: The device state to communicate with.
- *
- * Context: Any context. Expects mbox_mutex to be held.
- */
-static void cxl_pci_mbox_put(struct cxl_dev_state *cxlds)
-{
- mutex_unlock(&cxlds->mbox_mutex);
-}
-
static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *cmd)
{
int rc;
- rc = cxl_pci_mbox_get(cxlds);
- if (rc)
- return rc;
-
+ mutex_lock_io(&cxlds->mbox_mutex);
rc = __cxl_pci_mbox_send_cmd(cxlds, cmd);
- cxl_pci_mbox_put(cxlds);
+ mutex_unlock(&cxlds->mbox_mutex);
return rc;
}
@@ -281,6 +220,34 @@ static int cxl_pci_mbox_send(struct cxl_dev_state *cxlds, struct cxl_mbox_cmd *c
static int cxl_pci_setup_mailbox(struct cxl_dev_state *cxlds)
{
const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
+ unsigned long timeout;
+ u64 md_status;
+
+ timeout = jiffies + mbox_ready_timeout * HZ;
+ do {
+ md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+ if (md_status & CXLMDEV_MBOX_IF_READY)
+ break;
+ if (msleep_interruptible(100))
+ break;
+ } while (!time_after(jiffies, timeout));
+
+ if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
+ cxl_err(cxlds->dev, md_status,
+ "timeout awaiting mailbox ready");
+ return -ETIMEDOUT;
+ }
+
+ /*
+ * A command may be in flight from a previous driver instance,
+ * think kexec, do one doorbell wait so that
+ * __cxl_pci_mbox_send_cmd() can assume that it is the only
+ * source for future doorbell busy events.
+ */
+ if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
+ cxl_err(cxlds->dev, md_status, "timeout awaiting mailbox idle");
+ return -ETIMEDOUT;
+ }
cxlds->mbox_send = cxl_pci_mbox_send;
cxlds->payload_size =
@@ -400,75 +367,182 @@ static int cxl_map_regs(struct cxl_dev_state *cxlds, struct cxl_register_map *ma
return 0;
}
-static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi,
- struct cxl_register_map *map)
+static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
+ struct cxl_register_map *map)
{
- map->block_offset =
- ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK);
- map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
- map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
+ int rc;
+
+ rc = cxl_find_regblock(pdev, type, map);
+ if (rc)
+ return rc;
+
+ rc = cxl_map_regblock(pdev, map);
+ if (rc)
+ return rc;
+
+ rc = cxl_probe_regs(pdev, map);
+ cxl_unmap_regblock(pdev, map);
+
+ return rc;
}
-/**
- * cxl_find_regblock() - Locate register blocks by type
- * @pdev: The CXL PCI device to enumerate.
- * @type: Register Block Indicator id
- * @map: Enumeration output, clobbered on error
- *
- * Return: 0 if register block enumerated, negative error code otherwise
- *
- * A CXL DVSEC may point to one or more register blocks, search for them
- * by @type.
- */
-static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type,
- struct cxl_register_map *map)
+static int wait_for_valid(struct cxl_dev_state *cxlds)
{
- u32 regloc_size, regblocks;
- int regloc, i;
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec, rc;
+ u32 val;
- regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL,
- PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
- if (!regloc)
- return -ENXIO;
+ /*
+ * Memory_Info_Valid: When set, indicates that the CXL Range 1 Size high
+ * and Size Low registers are valid. Must be set within 1 second of
+ * deassertion of reset to CXL device. Likely it is already set by the
+ * time this runs, but otherwise give a 1.5 second timeout in case of
+ * clock skew.
+ */
+ rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+ if (rc)
+ return rc;
+
+ if (val & CXL_DVSEC_MEM_INFO_VALID)
+ return 0;
+
+ msleep(1500);
+
+ rc = pci_read_config_dword(pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &val);
+ if (rc)
+ return rc;
- pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
- regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
+ if (val & CXL_DVSEC_MEM_INFO_VALID)
+ return 0;
- regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
- regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
+ return -ETIMEDOUT;
+}
+
+/*
+ * Wait up to @mbox_ready_timeout for the device to report memory
+ * active.
+ */
+static int wait_for_media_ready(struct cxl_dev_state *cxlds)
+{
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec;
+ bool active = false;
+ u64 md_status;
+ int rc, i;
+
+ rc = wait_for_valid(cxlds);
+ if (rc)
+ return rc;
- for (i = 0; i < regblocks; i++, regloc += 8) {
- u32 reg_lo, reg_hi;
+ for (i = mbox_ready_timeout; i; i--) {
+ u32 temp;
+ int rc;
- pci_read_config_dword(pdev, regloc, &reg_lo);
- pci_read_config_dword(pdev, regloc + 4, &reg_hi);
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(0), &temp);
+ if (rc)
+ return rc;
- cxl_decode_regblock(reg_lo, reg_hi, map);
+ active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp);
+ if (active)
+ break;
+ msleep(1000);
+ }
- if (map->reg_type == type)
- return 0;
+ if (!active) {
+ dev_err(&pdev->dev,
+ "timeout awaiting memory active after %d seconds\n",
+ mbox_ready_timeout);
+ return -ETIMEDOUT;
}
- return -ENODEV;
+ md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
+ if (!CXLMDEV_READY(md_status))
+ return -EIO;
+
+ return 0;
}
-static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
- struct cxl_register_map *map)
+static int cxl_dvsec_ranges(struct cxl_dev_state *cxlds)
{
- int rc;
+ struct cxl_endpoint_dvsec_info *info = &cxlds->info;
+ struct pci_dev *pdev = to_pci_dev(cxlds->dev);
+ int d = cxlds->cxl_dvsec;
+ int hdm_count, rc, i;
+ u16 cap, ctrl;
- rc = cxl_find_regblock(pdev, type, map);
+ if (!d)
+ return -ENXIO;
+
+ rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap);
if (rc)
return rc;
- rc = cxl_map_regblock(pdev, map);
+ rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl);
if (rc)
return rc;
- rc = cxl_probe_regs(pdev, map);
- cxl_unmap_regblock(pdev, map);
+ if (!(cap & CXL_DVSEC_MEM_CAPABLE))
+ return -ENXIO;
- return rc;
+ /*
+ * It is not allowed by spec for MEM.capable to be set and have 0 legacy
+ * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this
+ * driver is for a spec defined class code which must be CXL.mem
+ * capable, there is no point in continuing to enable CXL.mem.
+ */
+ hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap);
+ if (!hdm_count || hdm_count > 2)
+ return -EINVAL;
+
+ rc = wait_for_valid(cxlds);
+ if (rc)
+ return rc;
+
+ info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl);
+
+ for (i = 0; i < hdm_count; i++) {
+ u64 base, size;
+ u32 temp;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp);
+ if (rc)
+ return rc;
+
+ size = (u64)temp << 32;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp);
+ if (rc)
+ return rc;
+
+ size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp);
+ if (rc)
+ return rc;
+
+ base = (u64)temp << 32;
+
+ rc = pci_read_config_dword(
+ pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp);
+ if (rc)
+ return rc;
+
+ base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK;
+
+ info->dvsec_range[i] = (struct range) {
+ .start = base,
+ .end = base + size - 1
+ };
+
+ if (size)
+ info->ranges++;
+ }
+
+ return 0;
}
static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
@@ -493,6 +567,15 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(cxlds))
return PTR_ERR(cxlds);
+ cxlds->serial = pci_get_dsn(pdev);
+ cxlds->cxl_dvsec = pci_find_dvsec_capability(
+ pdev, PCI_DVSEC_VENDOR_ID_CXL, CXL_DVSEC_PCIE_DEVICE);
+ if (!cxlds->cxl_dvsec)
+ dev_warn(&pdev->dev,
+ "Device DVSEC not present, skip CXL.mem init\n");
+
+ cxlds->wait_media_ready = wait_for_media_ready;
+
rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
if (rc)
return rc;
@@ -501,6 +584,17 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ /*
+ * If the component registers can't be found, the cxl_pci driver may
+ * still be useful for management functions so don't return an error.
+ */
+ cxlds->component_reg_phys = CXL_RESOURCE_NONE;
+ rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, &map);
+ if (rc)
+ dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
+
+ cxlds->component_reg_phys = cxl_regmap_to_base(pdev, &map);
+
rc = cxl_pci_setup_mailbox(cxlds);
if (rc)
return rc;
@@ -517,6 +611,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (rc)
return rc;
+ rc = cxl_dvsec_ranges(cxlds);
+ if (rc)
+ dev_warn(&pdev->dev,
+ "Failed to get DVSEC range information (%d)\n", rc);
+
cxlmd = devm_cxl_add_memdev(cxlds);
if (IS_ERR(cxlmd))
return PTR_ERR(cxlmd);