diff options
author | Dan Williams <dan.j.williams@intel.com> | 2024-03-13 00:07:36 -0700 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2024-03-13 00:07:36 -0700 |
commit | d5c0078033e2adf30e897d985fd29084ecb56b51 (patch) | |
tree | 4db9109b51457c17972fa262f1ac1bfc80aad9da /drivers/cxl | |
parent | c6c3187d66bc4e87086036266def4170742d7214 (diff) | |
parent | debdce20c4f28b7e5aa48512e7abf270a00e9051 (diff) |
Merge branch 'for-6.9/cxl-qos' into for-6.9/cxl
Pick up support for CXL "HMEM reporting" for v6.9, i.e. build an HMAT
from CXL CDAT and PCIe switch information.
Diffstat (limited to 'drivers/cxl')
-rw-r--r-- | drivers/cxl/acpi.c | 8 | ||||
-rw-r--r-- | drivers/cxl/core/cdat.c | 134 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 4 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 45 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 169 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 15 |
6 files changed, 342 insertions, 33 deletions
diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index 1a3e6aafbdcc..af5cb818f84d 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -530,13 +530,15 @@ static int get_genport_coordinates(struct device *dev, struct cxl_dport *dport) if (kstrtou32(acpi_device_uid(hb), 0, &uid)) return -EINVAL; - rc = acpi_get_genport_coordinates(uid, &dport->hb_coord); + rc = acpi_get_genport_coordinates(uid, dport->hb_coord); if (rc < 0) return rc; /* Adjust back to picoseconds from nanoseconds */ - dport->hb_coord.read_latency *= 1000; - dport->hb_coord.write_latency *= 1000; + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + dport->hb_coord[i].read_latency *= 1000; + dport->hb_coord[i].write_latency *= 1000; + } return 0; } diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index c8737e480789..e8c066293b31 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -9,6 +9,7 @@ #include "cxlmem.h" #include "core.h" #include "cxl.h" +#include "core.h" struct dsmas_entry { struct range dpa_range; @@ -162,15 +163,22 @@ static int cxl_cdat_endpoint_process(struct cxl_port *port, static int cxl_port_perf_data_calculate(struct cxl_port *port, struct xarray *dsmas_xa) { - struct access_coordinate c; + struct access_coordinate ep_c; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; struct dsmas_entry *dent; int valid_entries = 0; unsigned long index; int rc; - rc = cxl_endpoint_get_perf_coordinates(port, &c); + rc = cxl_endpoint_get_perf_coordinates(port, &ep_c); if (rc) { - dev_dbg(&port->dev, "Failed to retrieve perf coordinates.\n"); + dev_dbg(&port->dev, "Failed to retrieve ep perf coordinates.\n"); + return rc; + } + + rc = cxl_hb_get_perf_coordinates(port, coord); + if (rc) { + dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n"); return rc; } @@ -185,18 +193,19 @@ static int cxl_port_perf_data_calculate(struct cxl_port *port, xa_for_each(dsmas_xa, index, dent) { int qos_class; - dent->coord.read_latency = dent->coord.read_latency + - c.read_latency; - dent->coord.write_latency = dent->coord.write_latency + - c.write_latency; - dent->coord.read_bandwidth = min_t(int, c.read_bandwidth, - dent->coord.read_bandwidth); - dent->coord.write_bandwidth = min_t(int, c.write_bandwidth, - dent->coord.write_bandwidth); - + cxl_coordinates_combine(&dent->coord, &dent->coord, &ep_c); + /* + * Keeping the host bridge coordinates separate from the dsmas + * coordinates in order to allow calculation of access class + * 0 and 1 for region later. + */ + cxl_coordinates_combine(&coord[ACCESS_COORDINATE_CPU], + &coord[ACCESS_COORDINATE_CPU], + &dent->coord); dent->entries = 1; - rc = cxl_root->ops->qos_class(cxl_root, &dent->coord, 1, - &qos_class); + rc = cxl_root->ops->qos_class(cxl_root, + &coord[ACCESS_COORDINATE_CPU], + 1, &qos_class); if (rc != 1) continue; @@ -484,4 +493,101 @@ void cxl_switch_parse_cdat(struct cxl_port *port) } EXPORT_SYMBOL_NS_GPL(cxl_switch_parse_cdat, CXL); +/** + * cxl_coordinates_combine - Combine the two input coordinates + * + * @out: Output coordinate of c1 and c2 combined + * @c1: input coordinates + * @c2: input coordinates + */ +void cxl_coordinates_combine(struct access_coordinate *out, + struct access_coordinate *c1, + struct access_coordinate *c2) +{ + if (c1->write_bandwidth && c2->write_bandwidth) + out->write_bandwidth = min(c1->write_bandwidth, + c2->write_bandwidth); + out->write_latency = c1->write_latency + c2->write_latency; + + if (c1->read_bandwidth && c2->read_bandwidth) + out->read_bandwidth = min(c1->read_bandwidth, + c2->read_bandwidth); + out->read_latency = c1->read_latency + c2->read_latency; +} + MODULE_IMPORT_NS(CXL); + +void cxl_region_perf_data_calculate(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled) +{ + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_port *port = cxlmd->endpoint; + struct cxl_dev_state *cxlds = cxlmd->cxlds; + struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); + struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX]; + struct access_coordinate coord; + struct range dpa = { + .start = cxled->dpa_res->start, + .end = cxled->dpa_res->end, + }; + struct cxl_dpa_perf *perf; + int rc; + + switch (cxlr->mode) { + case CXL_DECODER_RAM: + perf = &mds->ram_perf; + break; + case CXL_DECODER_PMEM: + perf = &mds->pmem_perf; + break; + default: + return; + } + + lockdep_assert_held(&cxl_dpa_rwsem); + + if (!range_contains(&perf->dpa_range, &dpa)) + return; + + rc = cxl_hb_get_perf_coordinates(port, hb_coord); + if (rc) { + dev_dbg(&port->dev, "Failed to retrieve hb perf coordinates.\n"); + return; + } + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + /* Pickup the host bridge coords */ + cxl_coordinates_combine(&coord, &hb_coord[i], &perf->coord); + + /* Get total bandwidth and the worst latency for the cxl region */ + cxlr->coord[i].read_latency = max_t(unsigned int, + cxlr->coord[i].read_latency, + coord.read_latency); + cxlr->coord[i].write_latency = max_t(unsigned int, + cxlr->coord[i].write_latency, + coord.write_latency); + cxlr->coord[i].read_bandwidth += coord.read_bandwidth; + cxlr->coord[i].write_bandwidth += coord.write_bandwidth; + + /* + * Convert latency to nanosec from picosec to be consistent + * with the resulting latency coordinates computed by the + * HMAT_REPORTING code. + */ + cxlr->coord[i].read_latency = + DIV_ROUND_UP(cxlr->coord[i].read_latency, 1000); + cxlr->coord[i].write_latency = + DIV_ROUND_UP(cxlr->coord[i].write_latency, 1000); + } +} + +int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, + enum access_coordinate_class access) +{ + return hmat_update_target_coordinates(nid, &cxlr->coord[access], access); +} + +bool cxl_need_node_perf_attrs_update(int nid) +{ + return !acpi_node_backed_by_real_pxm(nid); +} diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 3b64fb1b9ed0..bc5a95665aa0 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -90,4 +90,8 @@ enum cxl_poison_trace_type { long cxl_pci_get_latency(struct pci_dev *pdev); +int cxl_update_hmat_access_coordinates(int nid, struct cxl_region *cxlr, + enum access_coordinate_class access); +bool cxl_need_node_perf_attrs_update(int nid); + #endif /* __CXL_CORE_H__ */ diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index e59d9d37aa65..9ab542e7af65 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -822,6 +822,7 @@ static struct cxl_port *__devm_cxl_add_port(struct device *host, */ port->reg_map = cxlds->reg_map; port->reg_map.host = &port->dev; + cxlmd->endpoint = port; } else if (parent_dport) { rc = dev_set_name(dev, "port%d", port->id); if (rc) @@ -1374,7 +1375,6 @@ int cxl_endpoint_autoremove(struct cxl_memdev *cxlmd, struct cxl_port *endpoint) get_device(host); get_device(&endpoint->dev); - cxlmd->endpoint = endpoint; cxlmd->depth = endpoint->depth; return devm_add_action_or_reset(dev, delete_endpoint, cxlmd); } @@ -2096,18 +2096,36 @@ bool schedule_cxl_memdev_detach(struct cxl_memdev *cxlmd) } EXPORT_SYMBOL_NS_GPL(schedule_cxl_memdev_detach, CXL); -static void combine_coordinates(struct access_coordinate *c1, - struct access_coordinate *c2) +/** + * cxl_hb_get_perf_coordinates - Retrieve performance numbers between initiator + * and host bridge + * + * @port: endpoint cxl_port + * @coord: output access coordinates + * + * Return: errno on failure, 0 on success. + */ +int cxl_hb_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord) { - if (c2->write_bandwidth) - c1->write_bandwidth = min(c1->write_bandwidth, - c2->write_bandwidth); - c1->write_latency += c2->write_latency; + struct cxl_port *iter = port; + struct cxl_dport *dport; + + if (!is_cxl_endpoint(port)) + return -EINVAL; + + dport = iter->parent_dport; + while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { + iter = to_cxl_port(iter->dev.parent); + dport = iter->parent_dport; + } + + coord[ACCESS_COORDINATE_LOCAL] = + dport->hb_coord[ACCESS_COORDINATE_LOCAL]; + coord[ACCESS_COORDINATE_CPU] = + dport->hb_coord[ACCESS_COORDINATE_CPU]; - if (c2->read_bandwidth) - c1->read_bandwidth = min(c1->read_bandwidth, - c2->read_bandwidth); - c1->read_latency += c2->read_latency; + return 0; } /** @@ -2143,7 +2161,7 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, * nothing to gather. */ while (iter && !is_cxl_root(to_cxl_port(iter->dev.parent))) { - combine_coordinates(&c, &dport->sw_coord); + cxl_coordinates_combine(&c, &c, &dport->sw_coord); c.write_latency += dport->link_latency; c.read_latency += dport->link_latency; @@ -2151,9 +2169,6 @@ int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, dport = iter->parent_dport; } - /* Augment with the generic port (host bridge) perf data */ - combine_coordinates(&c, &dport->hb_coord); - /* Get the calculated PCI paths bandwidth */ pdev = to_pci_dev(port->uport_dev->parent); bw = pcie_bandwidth_available(pdev, NULL, NULL, NULL); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 4c7fd2d5cccb..5c186e0a39b9 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -4,6 +4,7 @@ #include <linux/genalloc.h> #include <linux/device.h> #include <linux/module.h> +#include <linux/memory.h> #include <linux/slab.h> #include <linux/uuid.h> #include <linux/sort.h> @@ -30,6 +31,108 @@ static struct cxl_region *to_cxl_region(struct device *dev); +#define __ACCESS_ATTR_RO(_level, _name) { \ + .attr = { .name = __stringify(_name), .mode = 0444 }, \ + .show = _name##_access##_level##_show, \ +} + +#define ACCESS_DEVICE_ATTR_RO(level, name) \ + struct device_attribute dev_attr_access##level##_##name = __ACCESS_ATTR_RO(level, name) + +#define ACCESS_ATTR_RO(level, attrib) \ +static ssize_t attrib##_access##level##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *buf) \ +{ \ + struct cxl_region *cxlr = to_cxl_region(dev); \ + \ + if (cxlr->coord[level].attrib == 0) \ + return -ENOENT; \ + \ + return sysfs_emit(buf, "%u\n", cxlr->coord[level].attrib); \ +} \ +static ACCESS_DEVICE_ATTR_RO(level, attrib) + +ACCESS_ATTR_RO(0, read_bandwidth); +ACCESS_ATTR_RO(0, read_latency); +ACCESS_ATTR_RO(0, write_bandwidth); +ACCESS_ATTR_RO(0, write_latency); + +#define ACCESS_ATTR_DECLARE(level, attrib) \ + (&dev_attr_access##level##_##attrib.attr) + +static struct attribute *access0_coordinate_attrs[] = { + ACCESS_ATTR_DECLARE(0, read_bandwidth), + ACCESS_ATTR_DECLARE(0, write_bandwidth), + ACCESS_ATTR_DECLARE(0, read_latency), + ACCESS_ATTR_DECLARE(0, write_latency), + NULL +}; + +ACCESS_ATTR_RO(1, read_bandwidth); +ACCESS_ATTR_RO(1, read_latency); +ACCESS_ATTR_RO(1, write_bandwidth); +ACCESS_ATTR_RO(1, write_latency); + +static struct attribute *access1_coordinate_attrs[] = { + ACCESS_ATTR_DECLARE(1, read_bandwidth), + ACCESS_ATTR_DECLARE(1, write_bandwidth), + ACCESS_ATTR_DECLARE(1, read_latency), + ACCESS_ATTR_DECLARE(1, write_latency), + NULL +}; + +#define ACCESS_VISIBLE(level) \ +static umode_t cxl_region_access##level##_coordinate_visible( \ + struct kobject *kobj, struct attribute *a, int n) \ +{ \ + struct device *dev = kobj_to_dev(kobj); \ + struct cxl_region *cxlr = to_cxl_region(dev); \ + \ + if (a == &dev_attr_access##level##_read_latency.attr && \ + cxlr->coord[level].read_latency == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_write_latency.attr && \ + cxlr->coord[level].write_latency == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_read_bandwidth.attr && \ + cxlr->coord[level].read_bandwidth == 0) \ + return 0; \ + \ + if (a == &dev_attr_access##level##_write_bandwidth.attr && \ + cxlr->coord[level].write_bandwidth == 0) \ + return 0; \ + \ + return a->mode; \ +} + +ACCESS_VISIBLE(0); +ACCESS_VISIBLE(1); + +static const struct attribute_group cxl_region_access0_coordinate_group = { + .name = "access0", + .attrs = access0_coordinate_attrs, + .is_visible = cxl_region_access0_coordinate_visible, +}; + +static const struct attribute_group *get_cxl_region_access0_group(void) +{ + return &cxl_region_access0_coordinate_group; +} + +static const struct attribute_group cxl_region_access1_coordinate_group = { + .name = "access1", + .attrs = access1_coordinate_attrs, + .is_visible = cxl_region_access1_coordinate_visible, +}; + +static const struct attribute_group *get_cxl_region_access1_group(void) +{ + return &cxl_region_access1_coordinate_group; +} + static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -1752,6 +1855,8 @@ static int cxl_region_attach(struct cxl_region *cxlr, return -EINVAL; } + cxl_region_perf_data_calculate(cxlr, cxled); + if (test_bit(CXL_REGION_F_AUTO, &cxlr->flags)) { int i; @@ -2067,6 +2172,8 @@ static const struct attribute_group *region_groups[] = { &cxl_base_attribute_group, &cxl_region_group, &cxl_region_target_group, + &cxl_region_access0_coordinate_group, + &cxl_region_access1_coordinate_group, NULL, }; @@ -2120,6 +2227,7 @@ static void unregister_region(void *_cxlr) struct cxl_region_params *p = &cxlr->params; int i; + unregister_memory_notifier(&cxlr->memory_notifier); device_del(&cxlr->dev); /* @@ -2164,6 +2272,63 @@ static struct cxl_region *cxl_region_alloc(struct cxl_root_decoder *cxlrd, int i return cxlr; } +static bool cxl_region_update_coordinates(struct cxl_region *cxlr, int nid) +{ + int cset = 0; + int rc; + + for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { + if (cxlr->coord[i].read_bandwidth) { + rc = 0; + if (cxl_need_node_perf_attrs_update(nid)) + node_set_perf_attrs(nid, &cxlr->coord[i], i); + else + rc = cxl_update_hmat_access_coordinates(nid, cxlr, i); + + if (rc == 0) + cset++; + } + } + + if (!cset) + return false; + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access0_group()); + if (rc) + dev_dbg(&cxlr->dev, "Failed to update access0 group\n"); + + rc = sysfs_update_group(&cxlr->dev.kobj, get_cxl_region_access1_group()); + if (rc) + dev_dbg(&cxlr->dev, "Failed to update access1 group\n"); + + return true; +} + +static int cxl_region_perf_attrs_callback(struct notifier_block *nb, + unsigned long action, void *arg) +{ + struct cxl_region *cxlr = container_of(nb, struct cxl_region, + memory_notifier); + struct cxl_region_params *p = &cxlr->params; + struct cxl_endpoint_decoder *cxled = p->targets[0]; + struct cxl_decoder *cxld = &cxled->cxld; + struct memory_notify *mnb = arg; + int nid = mnb->status_change_nid; + int region_nid; + + if (nid == NUMA_NO_NODE || action != MEM_ONLINE) + return NOTIFY_DONE; + + region_nid = phys_to_target_node(cxld->hpa_range.start); + if (nid != region_nid) + return NOTIFY_DONE; + + if (!cxl_region_update_coordinates(cxlr, nid)) + return NOTIFY_DONE; + + return NOTIFY_OK; +} + /** * devm_cxl_add_region - Adds a region to a decoder * @cxlrd: root decoder @@ -2211,6 +2376,10 @@ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, if (rc) goto err; + cxlr->memory_notifier.notifier_call = cxl_region_perf_attrs_callback; + cxlr->memory_notifier.priority = CXL_CALLBACK_PRI; + register_memory_notifier(&cxlr->memory_notifier); + rc = devm_add_action_or_reset(port->uport_dev, unregister_region, cxlr); if (rc) return ERR_PTR(rc); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 003feebab79b..534e25e2f0a4 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -6,6 +6,7 @@ #include <linux/libnvdimm.h> #include <linux/bitfield.h> +#include <linux/notifier.h> #include <linux/bitops.h> #include <linux/log2.h> #include <linux/node.h> @@ -517,6 +518,8 @@ struct cxl_region_params { * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge * @flags: Region state flags * @params: active + config params for the region + * @coord: QoS access coordinates for the region + * @memory_notifier: notifier for setting the access coordinates to node */ struct cxl_region { struct device dev; @@ -527,6 +530,8 @@ struct cxl_region { struct cxl_pmem_region *cxlr_pmem; unsigned long flags; struct cxl_region_params params; + struct access_coordinate coord[ACCESS_COORDINATE_MAX]; + struct notifier_block memory_notifier; }; struct cxl_nvdimm_bridge { @@ -671,7 +676,7 @@ struct cxl_dport { struct cxl_port *port; struct cxl_regs regs; struct access_coordinate sw_coord; - struct access_coordinate hb_coord; + struct access_coordinate hb_coord[ACCESS_COORDINATE_MAX]; long link_latency; }; @@ -879,9 +884,17 @@ void cxl_switch_parse_cdat(struct cxl_port *port); int cxl_endpoint_get_perf_coordinates(struct cxl_port *port, struct access_coordinate *coord); +int cxl_hb_get_perf_coordinates(struct cxl_port *port, + struct access_coordinate *coord); +void cxl_region_perf_data_calculate(struct cxl_region *cxlr, + struct cxl_endpoint_decoder *cxled); void cxl_memdev_update_perf(struct cxl_memdev *cxlmd); +void cxl_coordinates_combine(struct access_coordinate *out, + struct access_coordinate *c1, + struct access_coordinate *c2); + /* * Unit test builds overrides this to __weak, find the 'strong' version * of these symbols in tools/testing/cxl/. |