From 7fac54b93ad13e5e7ac237af33eb2a0940eaeea0 Mon Sep 17 00:00:00 2001 From: Rong Tao Date: Wed, 21 Dec 2022 20:36:27 +0800 Subject: atm: uapi: fix spelling typos in comments Fix the typo of 'Unsuported' in atmbr2684.h Signed-off-by: Rong Tao Link: https://lore.kernel.org/r/tencent_F1354BEC925C65EA357E741E91DF2044E805@qq.com Signed-off-by: Jakub Kicinski --- include/uapi/linux/atmbr2684.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/atmbr2684.h b/include/uapi/linux/atmbr2684.h index a9e2250cd720..d47c47d06f11 100644 --- a/include/uapi/linux/atmbr2684.h +++ b/include/uapi/linux/atmbr2684.h @@ -38,7 +38,7 @@ */ #define BR2684_ENCAPS_VC (0) /* VC-mux */ #define BR2684_ENCAPS_LLC (1) -#define BR2684_ENCAPS_AUTODETECT (2) /* Unsuported */ +#define BR2684_ENCAPS_AUTODETECT (2) /* Unsupported */ /* * Is this VC bridged or routed? -- cgit v1.3.1 From b9e05399d9273c8c066e73db1e6e85364003030c Mon Sep 17 00:00:00 2001 From: Si-Wei Liu Date: Mon, 10 Oct 2022 10:27:03 -0700 Subject: vdpa: merge functionally duplicated dev_features attributes We can merge VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES with VDPA_ATTR_DEV_FEATURES which is functionally equivalent. While at it, tweak the comment in header file to make user provioned device features distinguished from those supported by the parent mgmtdev device: the former of which can be inherited as a whole from the latter, or can be a subset of the latter if explicitly specified. Signed-off-by: Si-Wei Liu Message-Id: <1665422823-18364-1-git-send-email-si-wei.liu@oracle.com> Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vdpa/vdpa.c | 2 +- include/uapi/linux/vdpa.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/uapi') diff --git a/drivers/vdpa/vdpa.c b/drivers/vdpa/vdpa.c index febdc99b51a7..41ed56362992 100644 --- a/drivers/vdpa/vdpa.c +++ b/drivers/vdpa/vdpa.c @@ -855,7 +855,7 @@ static int vdpa_dev_net_config_fill(struct vdpa_device *vdev, struct sk_buff *ms features_device = vdev->config->get_device_features(vdev); - if (nla_put_u64_64bit(msg, VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, features_device, + if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_FEATURES, features_device, VDPA_ATTR_PAD)) return -EMSGSIZE; diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h index 9bd79235c875..54b649ab0f22 100644 --- a/include/uapi/linux/vdpa.h +++ b/include/uapi/linux/vdpa.h @@ -53,11 +53,9 @@ enum vdpa_attr { VDPA_ATTR_DEV_VENDOR_ATTR_NAME, /* string */ VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, /* u64 */ + /* virtio features that are provisioned to the vDPA device */ VDPA_ATTR_DEV_FEATURES, /* u64 */ - /* virtio features that are supported by the vDPA device */ - VDPA_ATTR_VDPA_DEV_SUPPORTED_FEATURES, /* u64 */ - /* new attributes must be added above here */ VDPA_ATTR_MAX, }; -- cgit v1.3.1 From 4b83e99ee7092df37a5cf292fde976ebc475ea63 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 4 Jan 2023 14:44:13 -0700 Subject: Revert "pktcdvd: remove driver." MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f40eb99897af665f11858dd7b56edcb62c3f3c67. There are apparently still users out there of this driver. While we'd love to remove it to ease the maintenance burden, let's reinstate it for now until better (userspace) solutions can be developed. Link: https://lore.kernel.org/lkml/20230104190115.ceglfefco475ev6c@pali/ Reported-by: Pali Rohár Signed-off-by: Jens Axboe --- Documentation/ABI/testing/debugfs-pktcdvd | 18 + Documentation/ABI/testing/sysfs-class-pktcdvd | 97 + MAINTAINERS | 7 + drivers/block/Kconfig | 43 + drivers/block/Makefile | 1 + drivers/block/pktcdvd.c | 2944 +++++++++++++++++++++++++ include/linux/pktcdvd.h | 197 ++ include/uapi/linux/pktcdvd.h | 112 + 8 files changed, 3419 insertions(+) create mode 100644 Documentation/ABI/testing/debugfs-pktcdvd create mode 100644 Documentation/ABI/testing/sysfs-class-pktcdvd create mode 100644 drivers/block/pktcdvd.c create mode 100644 include/linux/pktcdvd.h create mode 100644 include/uapi/linux/pktcdvd.h (limited to 'include/uapi') diff --git a/Documentation/ABI/testing/debugfs-pktcdvd b/Documentation/ABI/testing/debugfs-pktcdvd new file mode 100644 index 000000000000..f6f65a4faea0 --- /dev/null +++ b/Documentation/ABI/testing/debugfs-pktcdvd @@ -0,0 +1,18 @@ +What: /sys/kernel/debug/pktcdvd/pktcdvd[0-7] +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + +The pktcdvd module (packet writing driver) creates +these files in debugfs: + +/sys/kernel/debug/pktcdvd/pktcdvd[0-7]/ + + ==== ====== ==================================== + info 0444 Lots of driver statistics and infos. + ==== ====== ==================================== + +Example:: + + cat /sys/kernel/debug/pktcdvd/pktcdvd0/info diff --git a/Documentation/ABI/testing/sysfs-class-pktcdvd b/Documentation/ABI/testing/sysfs-class-pktcdvd new file mode 100644 index 000000000000..ba1ce626591d --- /dev/null +++ b/Documentation/ABI/testing/sysfs-class-pktcdvd @@ -0,0 +1,97 @@ +sysfs interface +--------------- +The pktcdvd module (packet writing driver) creates the following files in the +sysfs: ( is in the format major:minor) + +What: /sys/class/pktcdvd/add +What: /sys/class/pktcdvd/remove +What: /sys/class/pktcdvd/device_map +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + + ========== ============================================== + add (WO) Write a block device id (major:minor) to + create a new pktcdvd device and map it to the + block device. + + remove (WO) Write the pktcdvd device id (major:minor) + to remove the pktcdvd device. + + device_map (RO) Shows the device mapping in format: + pktcdvd[0-7] + ========== ============================================== + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/dev +What: /sys/class/pktcdvd/pktcdvd[0-7]/uevent +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + dev: (RO) Device id + + uevent: (WO) To send a uevent + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_started +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/packets_finished +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_written +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/kb_read_gather +What: /sys/class/pktcdvd/pktcdvd[0-7]/stat/reset +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + packets_started: (RO) Number of started packets. + + packets_finished: (RO) Number of finished packets. + + kb_written: (RO) kBytes written. + + kb_read: (RO) kBytes read. + + kb_read_gather: (RO) kBytes read to fill write packets. + + reset: (WO) Write any value to it to reset + pktcdvd device statistic values, like + bytes read/written. + + +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/size +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_off +What: /sys/class/pktcdvd/pktcdvd[0-7]/write_queue/congestion_on +Date: Oct. 2006 +KernelVersion: 2.6.20 +Contact: Thomas Maier +Description: + ============== ================================================ + size (RO) Contains the size of the bio write queue. + + congestion_off (RW) If bio write queue size is below this mark, + accept new bio requests from the block layer. + + congestion_on (RW) If bio write queue size is higher as this + mark, do no longer accept bio write requests + from the block layer and wait till the pktcdvd + device has processed enough bio's so that bio + write queue size is below congestion off mark. + A value of <= 0 disables congestion control. + ============== ================================================ + + +Example: +-------- +To use the pktcdvd sysfs interface directly, you can do:: + + # create a new pktcdvd device mapped to /dev/hdc + echo "22:0" >/sys/class/pktcdvd/add + cat /sys/class/pktcdvd/device_map + # assuming device pktcdvd0 was created, look at stat's + cat /sys/class/pktcdvd/pktcdvd0/stat/kb_written + # print the device id of the mapped block device + fgrep pktcdvd0 /sys/class/pktcdvd/device_map + # remove device, using pktcdvd0 device id 253:0 + echo "253:0" >/sys/class/pktcdvd/remove diff --git a/MAINTAINERS b/MAINTAINERS index d53b3a6cdc67..3ef137fea4f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16520,6 +16520,13 @@ S: Supported F: Documentation/devicetree/bindings/input/pine64,pinephone-keyboard.yaml F: drivers/input/keyboard/pinephone-keyboard.c +PKTCDVD DRIVER +M: linux-block@vger.kernel.org +S: Orphan +F: drivers/block/pktcdvd.c +F: include/linux/pktcdvd.h +F: include/uapi/linux/pktcdvd.h + PLANTOWER PMS7003 AIR POLLUTION SENSOR DRIVER M: Tomasz Duszynski S: Maintained diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index a2184b428493..a41145d52de9 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -285,6 +285,49 @@ config BLK_DEV_RAM_SIZE The default value is 4096 kilobytes. Only change this if you know what you are doing. +config CDROM_PKTCDVD + tristate "Packet writing on CD/DVD media (DEPRECATED)" + depends on !UML + depends on SCSI + select CDROM + help + Note: This driver is deprecated and will be removed from the + kernel in the near future! + + If you have a CDROM/DVD drive that supports packet writing, say + Y to include support. It should work with any MMC/Mt Fuji + compliant ATAPI or SCSI drive, which is just about any newer + DVD/CD writer. + + Currently only writing to CD-RW, DVD-RW, DVD+RW and DVDRAM discs + is possible. + DVD-RW disks must be in restricted overwrite mode. + + See the file + for further information on the use of this driver. + + To compile this driver as a module, choose M here: the + module will be called pktcdvd. + +config CDROM_PKTCDVD_BUFFERS + int "Free buffers for data gathering" + depends on CDROM_PKTCDVD + default "8" + help + This controls the maximum number of active concurrent packets. More + concurrent packets can increase write performance, but also require + more memory. Each concurrent packet will require approximately 64Kb + of non-swappable kernel memory, memory which will be allocated when + a disc is opened for writing. + +config CDROM_PKTCDVD_WCACHE + bool "Enable write caching" + depends on CDROM_PKTCDVD + help + If enabled, write caching will be set for the CD-R/W device. For now + this option is dangerous unless the CD-RW media is known good, as we + don't do deferred write error handling yet. + config ATA_OVER_ETH tristate "ATA over Ethernet support" depends on NET diff --git a/drivers/block/Makefile b/drivers/block/Makefile index 962ee65d8ca3..101612cba303 100644 --- a/drivers/block/Makefile +++ b/drivers/block/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_AMIGA_Z2RAM) += z2ram.o obj-$(CONFIG_N64CART) += n64cart.o obj-$(CONFIG_BLK_DEV_RAM) += brd.o obj-$(CONFIG_BLK_DEV_LOOP) += loop.o +obj-$(CONFIG_CDROM_PKTCDVD) += pktcdvd.o obj-$(CONFIG_SUNVDC) += sunvdc.o obj-$(CONFIG_BLK_DEV_NBD) += nbd.o diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c new file mode 100644 index 000000000000..4cea3b08087e --- /dev/null +++ b/drivers/block/pktcdvd.c @@ -0,0 +1,2944 @@ +/* + * Copyright (C) 2000 Jens Axboe + * Copyright (C) 2001-2004 Peter Osterlund + * Copyright (C) 2006 Thomas Maier + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-RW, DVD+RW, DVD-RW and + * DVD-RAM devices. + * + * Theory of operation: + * + * At the lowest level, there is the standard driver for the CD/DVD device, + * such as drivers/scsi/sr.c. This driver can handle read and write requests, + * but it doesn't know anything about the special restrictions that apply to + * packet writing. One restriction is that write requests must be aligned to + * packet boundaries on the physical media, and the size of a write request + * must be equal to the packet size. Another restriction is that a + * GPCMD_FLUSH_CACHE command has to be issued to the drive before a read + * command, if the previous command was a write. + * + * The purpose of the packet writing driver is to hide these restrictions from + * higher layers, such as file systems, and present a block device that can be + * randomly read and written using 2kB-sized blocks. + * + * The lowest layer in the packet writing driver is the packet I/O scheduler. + * Its data is defined by the struct packet_iosched and includes two bio + * queues with pending read and write requests. These queues are processed + * by the pkt_iosched_process_queue() function. The write requests in this + * queue are already properly aligned and sized. This layer is responsible for + * issuing the flush cache commands and scheduling the I/O in a good order. + * + * The next layer transforms unaligned write requests to aligned writes. This + * transformation requires reading missing pieces of data from the underlying + * block device, assembling the pieces to full packets and queuing them to the + * packet I/O scheduler. + * + * At the top layer there is a custom ->submit_bio function that forwards + * read requests directly to the iosched queue and puts write requests in the + * unaligned write queue. A kernel thread performs the necessary read + * gathering to convert the unaligned writes to aligned writes and then feeds + * them to the packet I/O scheduler. + * + *************************************************************************/ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "pktcdvd" + +#define pkt_err(pd, fmt, ...) \ + pr_err("%s: " fmt, pd->name, ##__VA_ARGS__) +#define pkt_notice(pd, fmt, ...) \ + pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__) +#define pkt_info(pd, fmt, ...) \ + pr_info("%s: " fmt, pd->name, ##__VA_ARGS__) + +#define pkt_dbg(level, pd, fmt, ...) \ +do { \ + if (level == 2 && PACKET_DEBUG >= 2) \ + pr_notice("%s: %s():" fmt, \ + pd->name, __func__, ##__VA_ARGS__); \ + else if (level == 1 && PACKET_DEBUG >= 1) \ + pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__); \ +} while (0) + +#define MAX_SPEED 0xffff + +static DEFINE_MUTEX(pktcdvd_mutex); +static struct pktcdvd_device *pkt_devs[MAX_WRITERS]; +static struct proc_dir_entry *pkt_proc; +static int pktdev_major; +static int write_congestion_on = PKT_WRITE_CONGESTION_ON; +static int write_congestion_off = PKT_WRITE_CONGESTION_OFF; +static struct mutex ctl_mutex; /* Serialize open/close/setup/teardown */ +static mempool_t psd_pool; +static struct bio_set pkt_bio_set; + +static struct class *class_pktcdvd = NULL; /* /sys/class/pktcdvd */ +static struct dentry *pkt_debugfs_root = NULL; /* /sys/kernel/debug/pktcdvd */ + +/* forward declaration */ +static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev); +static int pkt_remove_dev(dev_t pkt_dev); +static int pkt_seq_show(struct seq_file *m, void *p); + +static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd) +{ + return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1); +} + +/********************************************************** + * sysfs interface for pktcdvd + * by (C) 2006 Thomas Maier + + /sys/class/pktcdvd/pktcdvd[0-7]/ + stat/reset + stat/packets_started + stat/packets_finished + stat/kb_written + stat/kb_read + stat/kb_read_gather + write_queue/size + write_queue/congestion_off + write_queue/congestion_on + **********************************************************/ + +static ssize_t packets_started_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_started); +} +static DEVICE_ATTR_RO(packets_started); + +static ssize_t packets_finished_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.pkt_ended); +} +static DEVICE_ATTR_RO(packets_finished); + +static ssize_t kb_written_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_w >> 1); +} +static DEVICE_ATTR_RO(kb_written); + +static ssize_t kb_read_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_r >> 1); +} +static DEVICE_ATTR_RO(kb_read); + +static ssize_t kb_read_gather_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + return sysfs_emit(buf, "%lu\n", pd->stats.secs_rg >> 1); +} +static DEVICE_ATTR_RO(kb_read_gather); + +static ssize_t reset_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + + if (len > 0) { + pd->stats.pkt_started = 0; + pd->stats.pkt_ended = 0; + pd->stats.secs_w = 0; + pd->stats.secs_rg = 0; + pd->stats.secs_r = 0; + } + return len; +} +static DEVICE_ATTR_WO(reset); + +static struct attribute *pkt_stat_attrs[] = { + &dev_attr_packets_finished.attr, + &dev_attr_packets_started.attr, + &dev_attr_kb_read.attr, + &dev_attr_kb_written.attr, + &dev_attr_kb_read_gather.attr, + &dev_attr_reset.attr, + NULL, +}; + +static const struct attribute_group pkt_stat_group = { + .name = "stat", + .attrs = pkt_stat_attrs, +}; + +static ssize_t size_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->bio_queue_size); + spin_unlock(&pd->lock); + return n; +} +static DEVICE_ATTR_RO(size); + +static void init_write_congestion_marks(int* lo, int* hi) +{ + if (*hi > 0) { + *hi = max(*hi, 500); + *hi = min(*hi, 1000000); + if (*lo <= 0) + *lo = *hi - 100; + else { + *lo = min(*lo, *hi - 100); + *lo = max(*lo, 100); + } + } else { + *hi = -1; + *lo = -1; + } +} + +static ssize_t congestion_off_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_off); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_off_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; + + if (sscanf(buf, "%d", &val) == 1) { + spin_lock(&pd->lock); + pd->write_congestion_off = val; + init_write_congestion_marks(&pd->write_congestion_off, + &pd->write_congestion_on); + spin_unlock(&pd->lock); + } + return len; +} +static DEVICE_ATTR_RW(congestion_off); + +static ssize_t congestion_on_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int n; + + spin_lock(&pd->lock); + n = sysfs_emit(buf, "%d\n", pd->write_congestion_on); + spin_unlock(&pd->lock); + return n; +} + +static ssize_t congestion_on_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t len) +{ + struct pktcdvd_device *pd = dev_get_drvdata(dev); + int val; + + if (sscanf(buf, "%d", &val) == 1) { + spin_lock(&pd->lock); + pd->write_congestion_on = val; + init_write_congestion_marks(&pd->write_congestion_off, + &pd->write_congestion_on); + spin_unlock(&pd->lock); + } + return len; +} +static DEVICE_ATTR_RW(congestion_on); + +static struct attribute *pkt_wq_attrs[] = { + &dev_attr_congestion_on.attr, + &dev_attr_congestion_off.attr, + &dev_attr_size.attr, + NULL, +}; + +static const struct attribute_group pkt_wq_group = { + .name = "write_queue", + .attrs = pkt_wq_attrs, +}; + +static const struct attribute_group *pkt_groups[] = { + &pkt_stat_group, + &pkt_wq_group, + NULL, +}; + +static void pkt_sysfs_dev_new(struct pktcdvd_device *pd) +{ + if (class_pktcdvd) { + pd->dev = device_create_with_groups(class_pktcdvd, NULL, + MKDEV(0, 0), pd, pkt_groups, + "%s", pd->name); + if (IS_ERR(pd->dev)) + pd->dev = NULL; + } +} + +static void pkt_sysfs_dev_remove(struct pktcdvd_device *pd) +{ + if (class_pktcdvd) + device_unregister(pd->dev); +} + + +/******************************************************************** + /sys/class/pktcdvd/ + add map block device + remove unmap packet dev + device_map show mappings + *******************************************************************/ + +static void class_pktcdvd_release(struct class *cls) +{ + kfree(cls); +} + +static ssize_t device_map_show(struct class *c, struct class_attribute *attr, + char *data) +{ + int n = 0; + int idx; + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + for (idx = 0; idx < MAX_WRITERS; idx++) { + struct pktcdvd_device *pd = pkt_devs[idx]; + if (!pd) + continue; + n += sprintf(data+n, "%s %u:%u %u:%u\n", + pd->name, + MAJOR(pd->pkt_dev), MINOR(pd->pkt_dev), + MAJOR(pd->bdev->bd_dev), + MINOR(pd->bdev->bd_dev)); + } + mutex_unlock(&ctl_mutex); + return n; +} +static CLASS_ATTR_RO(device_map); + +static ssize_t add_store(struct class *c, struct class_attribute *attr, + const char *buf, size_t count) +{ + unsigned int major, minor; + + if (sscanf(buf, "%u:%u", &major, &minor) == 2) { + /* pkt_setup_dev() expects caller to hold reference to self */ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + pkt_setup_dev(MKDEV(major, minor), NULL); + + module_put(THIS_MODULE); + + return count; + } + + return -EINVAL; +} +static CLASS_ATTR_WO(add); + +static ssize_t remove_store(struct class *c, struct class_attribute *attr, + const char *buf, size_t count) +{ + unsigned int major, minor; + if (sscanf(buf, "%u:%u", &major, &minor) == 2) { + pkt_remove_dev(MKDEV(major, minor)); + return count; + } + return -EINVAL; +} +static CLASS_ATTR_WO(remove); + +static struct attribute *class_pktcdvd_attrs[] = { + &class_attr_add.attr, + &class_attr_remove.attr, + &class_attr_device_map.attr, + NULL, +}; +ATTRIBUTE_GROUPS(class_pktcdvd); + +static int pkt_sysfs_init(void) +{ + int ret = 0; + + /* + * create control files in sysfs + * /sys/class/pktcdvd/... + */ + class_pktcdvd = kzalloc(sizeof(*class_pktcdvd), GFP_KERNEL); + if (!class_pktcdvd) + return -ENOMEM; + class_pktcdvd->name = DRIVER_NAME; + class_pktcdvd->owner = THIS_MODULE; + class_pktcdvd->class_release = class_pktcdvd_release; + class_pktcdvd->class_groups = class_pktcdvd_groups; + ret = class_register(class_pktcdvd); + if (ret) { + kfree(class_pktcdvd); + class_pktcdvd = NULL; + pr_err("failed to create class pktcdvd\n"); + return ret; + } + return 0; +} + +static void pkt_sysfs_cleanup(void) +{ + if (class_pktcdvd) + class_destroy(class_pktcdvd); + class_pktcdvd = NULL; +} + +/******************************************************************** + entries in debugfs + + /sys/kernel/debug/pktcdvd[0-7]/ + info + + *******************************************************************/ + +static int pkt_debugfs_seq_show(struct seq_file *m, void *p) +{ + return pkt_seq_show(m, p); +} + +static int pkt_debugfs_fops_open(struct inode *inode, struct file *file) +{ + return single_open(file, pkt_debugfs_seq_show, inode->i_private); +} + +static const struct file_operations debug_fops = { + .open = pkt_debugfs_fops_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, + .owner = THIS_MODULE, +}; + +static void pkt_debugfs_dev_new(struct pktcdvd_device *pd) +{ + if (!pkt_debugfs_root) + return; + pd->dfs_d_root = debugfs_create_dir(pd->name, pkt_debugfs_root); + if (!pd->dfs_d_root) + return; + + pd->dfs_f_info = debugfs_create_file("info", 0444, + pd->dfs_d_root, pd, &debug_fops); +} + +static void pkt_debugfs_dev_remove(struct pktcdvd_device *pd) +{ + if (!pkt_debugfs_root) + return; + debugfs_remove(pd->dfs_f_info); + debugfs_remove(pd->dfs_d_root); + pd->dfs_f_info = NULL; + pd->dfs_d_root = NULL; +} + +static void pkt_debugfs_init(void) +{ + pkt_debugfs_root = debugfs_create_dir(DRIVER_NAME, NULL); +} + +static void pkt_debugfs_cleanup(void) +{ + debugfs_remove(pkt_debugfs_root); + pkt_debugfs_root = NULL; +} + +/* ----------------------------------------------------------*/ + + +static void pkt_bio_finished(struct pktcdvd_device *pd) +{ + BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0); + if (atomic_dec_and_test(&pd->cdrw.pending_bios)) { + pkt_dbg(2, pd, "queue empty\n"); + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); + } +} + +/* + * Allocate a packet_data struct + */ +static struct packet_data *pkt_alloc_packet_data(int frames) +{ + int i; + struct packet_data *pkt; + + pkt = kzalloc(sizeof(struct packet_data), GFP_KERNEL); + if (!pkt) + goto no_pkt; + + pkt->frames = frames; + pkt->w_bio = bio_kmalloc(frames, GFP_KERNEL); + if (!pkt->w_bio) + goto no_bio; + + for (i = 0; i < frames / FRAMES_PER_PAGE; i++) { + pkt->pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO); + if (!pkt->pages[i]) + goto no_page; + } + + spin_lock_init(&pkt->lock); + bio_list_init(&pkt->orig_bios); + + for (i = 0; i < frames; i++) { + pkt->r_bios[i] = bio_kmalloc(1, GFP_KERNEL); + if (!pkt->r_bios[i]) + goto no_rd_bio; + } + + return pkt; + +no_rd_bio: + for (i = 0; i < frames; i++) + kfree(pkt->r_bios[i]); +no_page: + for (i = 0; i < frames / FRAMES_PER_PAGE; i++) + if (pkt->pages[i]) + __free_page(pkt->pages[i]); + kfree(pkt->w_bio); +no_bio: + kfree(pkt); +no_pkt: + return NULL; +} + +/* + * Free a packet_data struct + */ +static void pkt_free_packet_data(struct packet_data *pkt) +{ + int i; + + for (i = 0; i < pkt->frames; i++) + kfree(pkt->r_bios[i]); + for (i = 0; i < pkt->frames / FRAMES_PER_PAGE; i++) + __free_page(pkt->pages[i]); + kfree(pkt->w_bio); + kfree(pkt); +} + +static void pkt_shrink_pktlist(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + BUG_ON(!list_empty(&pd->cdrw.pkt_active_list)); + + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_free_list, list) { + pkt_free_packet_data(pkt); + } + INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); +} + +static int pkt_grow_pktlist(struct pktcdvd_device *pd, int nr_packets) +{ + struct packet_data *pkt; + + BUG_ON(!list_empty(&pd->cdrw.pkt_free_list)); + + while (nr_packets > 0) { + pkt = pkt_alloc_packet_data(pd->settings.size >> 2); + if (!pkt) { + pkt_shrink_pktlist(pd); + return 0; + } + pkt->id = nr_packets; + pkt->pd = pd; + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + nr_packets--; + } + return 1; +} + +static inline struct pkt_rb_node *pkt_rbtree_next(struct pkt_rb_node *node) +{ + struct rb_node *n = rb_next(&node->rb_node); + if (!n) + return NULL; + return rb_entry(n, struct pkt_rb_node, rb_node); +} + +static void pkt_rbtree_erase(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + rb_erase(&node->rb_node, &pd->bio_queue); + mempool_free(node, &pd->rb_pool); + pd->bio_queue_size--; + BUG_ON(pd->bio_queue_size < 0); +} + +/* + * Find the first node in the pd->bio_queue rb tree with a starting sector >= s. + */ +static struct pkt_rb_node *pkt_rbtree_find(struct pktcdvd_device *pd, sector_t s) +{ + struct rb_node *n = pd->bio_queue.rb_node; + struct rb_node *next; + struct pkt_rb_node *tmp; + + if (!n) { + BUG_ON(pd->bio_queue_size > 0); + return NULL; + } + + for (;;) { + tmp = rb_entry(n, struct pkt_rb_node, rb_node); + if (s <= tmp->bio->bi_iter.bi_sector) + next = n->rb_left; + else + next = n->rb_right; + if (!next) + break; + n = next; + } + + if (s > tmp->bio->bi_iter.bi_sector) { + tmp = pkt_rbtree_next(tmp); + if (!tmp) + return NULL; + } + BUG_ON(s > tmp->bio->bi_iter.bi_sector); + return tmp; +} + +/* + * Insert a node into the pd->bio_queue rb tree. + */ +static void pkt_rbtree_insert(struct pktcdvd_device *pd, struct pkt_rb_node *node) +{ + struct rb_node **p = &pd->bio_queue.rb_node; + struct rb_node *parent = NULL; + sector_t s = node->bio->bi_iter.bi_sector; + struct pkt_rb_node *tmp; + + while (*p) { + parent = *p; + tmp = rb_entry(parent, struct pkt_rb_node, rb_node); + if (s < tmp->bio->bi_iter.bi_sector) + p = &(*p)->rb_left; + else + p = &(*p)->rb_right; + } + rb_link_node(&node->rb_node, parent, p); + rb_insert_color(&node->rb_node, &pd->bio_queue); + pd->bio_queue_size++; +} + +/* + * Send a packet_command to the underlying block device and + * wait for completion. + */ +static int pkt_generic_packet(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + struct request_queue *q = bdev_get_queue(pd->bdev); + struct scsi_cmnd *scmd; + struct request *rq; + int ret = 0; + + rq = scsi_alloc_request(q, (cgc->data_direction == CGC_DATA_WRITE) ? + REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); + if (IS_ERR(rq)) + return PTR_ERR(rq); + scmd = blk_mq_rq_to_pdu(rq); + + if (cgc->buflen) { + ret = blk_rq_map_kern(q, rq, cgc->buffer, cgc->buflen, + GFP_NOIO); + if (ret) + goto out; + } + + scmd->cmd_len = COMMAND_SIZE(cgc->cmd[0]); + memcpy(scmd->cmnd, cgc->cmd, CDROM_PACKET_SIZE); + + rq->timeout = 60*HZ; + if (cgc->quiet) + rq->rq_flags |= RQF_QUIET; + + blk_execute_rq(rq, false); + if (scmd->result) + ret = -EIO; +out: + blk_mq_free_request(rq); + return ret; +} + +static const char *sense_key_string(__u8 index) +{ + static const char * const info[] = { + "No sense", "Recovered error", "Not ready", + "Medium error", "Hardware error", "Illegal request", + "Unit attention", "Data protect", "Blank check", + }; + + return index < ARRAY_SIZE(info) ? info[index] : "INVALID"; +} + +/* + * A generic sense dump / resolve mechanism should be implemented across + * all ATAPI + SCSI devices. + */ +static void pkt_dump_sense(struct pktcdvd_device *pd, + struct packet_command *cgc) +{ + struct scsi_sense_hdr *sshdr = cgc->sshdr; + + if (sshdr) + pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n", + CDROM_PACKET_SIZE, cgc->cmd, + sshdr->sense_key, sshdr->asc, sshdr->ascq, + sense_key_string(sshdr->sense_key)); + else + pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd); +} + +/* + * flush the drive cache to media + */ +static int pkt_flush_cache(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_FLUSH_CACHE; + cgc.quiet = 1; + + /* + * the IMMED bit -- we default to not setting it, although that + * would allow a much faster close, this is safer + */ +#if 0 + cgc.cmd[1] = 1 << 1; +#endif + return pkt_generic_packet(pd, &cgc); +} + +/* + * speed is given as the normal factor, e.g. 4 for 4x + */ +static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd, + unsigned write_speed, unsigned read_speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + int ret; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_SET_SPEED; + cgc.cmd[2] = (read_speed >> 8) & 0xff; + cgc.cmd[3] = read_speed & 0xff; + cgc.cmd[4] = (write_speed >> 8) & 0xff; + cgc.cmd[5] = write_speed & 0xff; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + pkt_dump_sense(pd, &cgc); + + return ret; +} + +/* + * Queue a bio for processing by the low-level CD device. Must be called + * from process context. + */ +static void pkt_queue_bio(struct pktcdvd_device *pd, struct bio *bio) +{ + spin_lock(&pd->iosched.lock); + if (bio_data_dir(bio) == READ) + bio_list_add(&pd->iosched.read_queue, bio); + else + bio_list_add(&pd->iosched.write_queue, bio); + spin_unlock(&pd->iosched.lock); + + atomic_set(&pd->iosched.attention, 1); + wake_up(&pd->wqueue); +} + +/* + * Process the queued read/write requests. This function handles special + * requirements for CDRW drives: + * - A cache flush command must be inserted before a read request if the + * previous request was a write. + * - Switching between reading and writing is slow, so don't do it more often + * than necessary. + * - Optimize for throughput at the expense of latency. This means that streaming + * writes will never be interrupted by a read, but if the drive has to seek + * before the next write, switch to reading instead if there are any pending + * read requests. + * - Set the read speed according to current usage pattern. When only reading + * from the device, it's best to use the highest possible read speed, but + * when switching often between reading and writing, it's better to have the + * same read and write speeds. + */ +static void pkt_iosched_process_queue(struct pktcdvd_device *pd) +{ + + if (atomic_read(&pd->iosched.attention) == 0) + return; + atomic_set(&pd->iosched.attention, 0); + + for (;;) { + struct bio *bio; + int reads_queued, writes_queued; + + spin_lock(&pd->iosched.lock); + reads_queued = !bio_list_empty(&pd->iosched.read_queue); + writes_queued = !bio_list_empty(&pd->iosched.write_queue); + spin_unlock(&pd->iosched.lock); + + if (!reads_queued && !writes_queued) + break; + + if (pd->iosched.writing) { + int need_write_seek = 1; + spin_lock(&pd->iosched.lock); + bio = bio_list_peek(&pd->iosched.write_queue); + spin_unlock(&pd->iosched.lock); + if (bio && (bio->bi_iter.bi_sector == + pd->iosched.last_write)) + need_write_seek = 0; + if (need_write_seek && reads_queued) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + pkt_dbg(2, pd, "write, waiting\n"); + break; + } + pkt_flush_cache(pd); + pd->iosched.writing = 0; + } + } else { + if (!reads_queued && writes_queued) { + if (atomic_read(&pd->cdrw.pending_bios) > 0) { + pkt_dbg(2, pd, "read, waiting\n"); + break; + } + pd->iosched.writing = 1; + } + } + + spin_lock(&pd->iosched.lock); + if (pd->iosched.writing) + bio = bio_list_pop(&pd->iosched.write_queue); + else + bio = bio_list_pop(&pd->iosched.read_queue); + spin_unlock(&pd->iosched.lock); + + if (!bio) + continue; + + if (bio_data_dir(bio) == READ) + pd->iosched.successive_reads += + bio->bi_iter.bi_size >> 10; + else { + pd->iosched.successive_reads = 0; + pd->iosched.last_write = bio_end_sector(bio); + } + if (pd->iosched.successive_reads >= HI_SPEED_SWITCH) { + if (pd->read_speed == pd->write_speed) { + pd->read_speed = MAX_SPEED; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } else { + if (pd->read_speed != pd->write_speed) { + pd->read_speed = pd->write_speed; + pkt_set_speed(pd, pd->write_speed, pd->read_speed); + } + } + + atomic_inc(&pd->cdrw.pending_bios); + submit_bio_noacct(bio); + } +} + +/* + * Special care is needed if the underlying block device has a small + * max_phys_segments value. + */ +static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_queue *q) +{ + if ((pd->settings.size << 9) / CD_FRAMESIZE + <= queue_max_segments(q)) { + /* + * The cdrom device can handle one segment/frame + */ + clear_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else if ((pd->settings.size << 9) / PAGE_SIZE + <= queue_max_segments(q)) { + /* + * We can handle this case at the expense of some extra memory + * copies during write operations + */ + set_bit(PACKET_MERGE_SEGS, &pd->flags); + return 0; + } else { + pkt_err(pd, "cdrom max_phys_segments too small\n"); + return -EIO; + } +} + +static void pkt_end_io_read(struct bio *bio) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n", + bio, (unsigned long long)pkt->sector, + (unsigned long long)bio->bi_iter.bi_sector, bio->bi_status); + + if (bio->bi_status) + atomic_inc(&pkt->io_errors); + bio_uninit(bio); + if (atomic_dec_and_test(&pkt->io_wait)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + pkt_bio_finished(pd); +} + +static void pkt_end_io_packet_write(struct bio *bio) +{ + struct packet_data *pkt = bio->bi_private; + struct pktcdvd_device *pd = pkt->pd; + BUG_ON(!pd); + + pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, bio->bi_status); + + pd->stats.pkt_ended++; + + bio_uninit(bio); + pkt_bio_finished(pd); + atomic_dec(&pkt->io_wait); + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); +} + +/* + * Schedule reads for the holes in a packet + */ +static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int frames_read = 0; + struct bio *bio; + int f; + char written[PACKET_MAX_SIZE]; + + BUG_ON(bio_list_empty(&pkt->orig_bios)); + + atomic_set(&pkt->io_wait, 0); + atomic_set(&pkt->io_errors, 0); + + /* + * Figure out which frames we need to read before we can write. + */ + memset(written, 0, sizeof(written)); + spin_lock(&pkt->lock); + bio_list_for_each(bio, &pkt->orig_bios) { + int first_frame = (bio->bi_iter.bi_sector - pkt->sector) / + (CD_FRAMESIZE >> 9); + int num_frames = bio->bi_iter.bi_size / CD_FRAMESIZE; + pd->stats.secs_w += num_frames * (CD_FRAMESIZE >> 9); + BUG_ON(first_frame < 0); + BUG_ON(first_frame + num_frames > pkt->frames); + for (f = first_frame; f < first_frame + num_frames; f++) + written[f] = 1; + } + spin_unlock(&pkt->lock); + + if (pkt->cache_valid) { + pkt_dbg(2, pd, "zone %llx cached\n", + (unsigned long long)pkt->sector); + goto out_account; + } + + /* + * Schedule reads for missing parts of the packet. + */ + for (f = 0; f < pkt->frames; f++) { + int p, offset; + + if (written[f]) + continue; + + bio = pkt->r_bios[f]; + bio_init(bio, pd->bdev, bio->bi_inline_vecs, 1, REQ_OP_READ); + bio->bi_iter.bi_sector = pkt->sector + f * (CD_FRAMESIZE >> 9); + bio->bi_end_io = pkt_end_io_read; + bio->bi_private = pkt; + + p = (f * CD_FRAMESIZE) / PAGE_SIZE; + offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n", + f, pkt->pages[p], offset); + if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset)) + BUG(); + + atomic_inc(&pkt->io_wait); + pkt_queue_bio(pd, bio); + frames_read++; + } + +out_account: + pkt_dbg(2, pd, "need %d frames for zone %llx\n", + frames_read, (unsigned long long)pkt->sector); + pd->stats.pkt_started++; + pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9); +} + +/* + * Find a packet matching zone, or the least recently used packet if + * there is no match. + */ +static struct packet_data *pkt_get_packet_data(struct pktcdvd_device *pd, int zone) +{ + struct packet_data *pkt; + + list_for_each_entry(pkt, &pd->cdrw.pkt_free_list, list) { + if (pkt->sector == zone || pkt->list.next == &pd->cdrw.pkt_free_list) { + list_del_init(&pkt->list); + if (pkt->sector != zone) + pkt->cache_valid = 0; + return pkt; + } + } + BUG(); + return NULL; +} + +static void pkt_put_packet_data(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + if (pkt->cache_valid) { + list_add(&pkt->list, &pd->cdrw.pkt_free_list); + } else { + list_add_tail(&pkt->list, &pd->cdrw.pkt_free_list); + } +} + +static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state state) +{ +#if PACKET_DEBUG > 1 + static const char *state_name[] = { + "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED" + }; + enum packet_data_state old_state = pkt->state; + pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n", + pkt->id, (unsigned long long)pkt->sector, + state_name[old_state], state_name[state]); +#endif + pkt->state = state; +} + +/* + * Scan the work queue to see if we can start a new packet. + * returns non-zero if any work was done. + */ +static int pkt_handle_queue(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *p; + struct bio *bio = NULL; + sector_t zone = 0; /* Suppress gcc warning */ + struct pkt_rb_node *node, *first_node; + struct rb_node *n; + + atomic_set(&pd->scan_queue, 0); + + if (list_empty(&pd->cdrw.pkt_free_list)) { + pkt_dbg(2, pd, "no pkt\n"); + return 0; + } + + /* + * Try to find a zone we are not already working on. + */ + spin_lock(&pd->lock); + first_node = pkt_rbtree_find(pd, pd->current_sector); + if (!first_node) { + n = rb_first(&pd->bio_queue); + if (n) + first_node = rb_entry(n, struct pkt_rb_node, rb_node); + } + node = first_node; + while (node) { + bio = node->bio; + zone = get_zone(bio->bi_iter.bi_sector, pd); + list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) { + if (p->sector == zone) { + bio = NULL; + goto try_next_bio; + } + } + break; +try_next_bio: + node = pkt_rbtree_next(node); + if (!node) { + n = rb_first(&pd->bio_queue); + if (n) + node = rb_entry(n, struct pkt_rb_node, rb_node); + } + if (node == first_node) + node = NULL; + } + spin_unlock(&pd->lock); + if (!bio) { + pkt_dbg(2, pd, "no bio\n"); + return 0; + } + + pkt = pkt_get_packet_data(pd, zone); + + pd->current_sector = zone + pd->settings.size; + pkt->sector = zone; + BUG_ON(pkt->frames != pd->settings.size >> 2); + pkt->write_size = 0; + + /* + * Scan work queue for bios in the same zone and link them + * to this packet. + */ + spin_lock(&pd->lock); + pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone); + while ((node = pkt_rbtree_find(pd, zone)) != NULL) { + bio = node->bio; + pkt_dbg(2, pd, "found zone=%llx\n", (unsigned long long) + get_zone(bio->bi_iter.bi_sector, pd)); + if (get_zone(bio->bi_iter.bi_sector, pd) != zone) + break; + pkt_rbtree_erase(pd, node); + spin_lock(&pkt->lock); + bio_list_add(&pkt->orig_bios, bio); + pkt->write_size += bio->bi_iter.bi_size / CD_FRAMESIZE; + spin_unlock(&pkt->lock); + } + /* check write congestion marks, and if bio_queue_size is + * below, wake up any waiters + */ + if (pd->congested && + pd->bio_queue_size <= pd->write_congestion_off) { + pd->congested = false; + wake_up_var(&pd->congested); + } + spin_unlock(&pd->lock); + + pkt->sleep_time = max(PACKET_WAIT_TIME, 1); + pkt_set_state(pkt, PACKET_WAITING_STATE); + atomic_set(&pkt->run_sm, 1); + + spin_lock(&pd->cdrw.active_list_lock); + list_add(&pkt->list, &pd->cdrw.pkt_active_list); + spin_unlock(&pd->cdrw.active_list_lock); + + return 1; +} + +/** + * bio_list_copy_data - copy contents of data buffers from one chain of bios to + * another + * @src: source bio list + * @dst: destination bio list + * + * Stops when it reaches the end of either the @src list or @dst list - that is, + * copies min(src->bi_size, dst->bi_size) bytes (or the equivalent for lists of + * bios). + */ +static void bio_list_copy_data(struct bio *dst, struct bio *src) +{ + struct bvec_iter src_iter = src->bi_iter; + struct bvec_iter dst_iter = dst->bi_iter; + + while (1) { + if (!src_iter.bi_size) { + src = src->bi_next; + if (!src) + break; + + src_iter = src->bi_iter; + } + + if (!dst_iter.bi_size) { + dst = dst->bi_next; + if (!dst) + break; + + dst_iter = dst->bi_iter; + } + + bio_copy_data_iter(dst, &dst_iter, src, &src_iter); + } +} + +/* + * Assemble a bio to write one packet and queue the bio for processing + * by the underlying block device. + */ +static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + int f; + + bio_init(pkt->w_bio, pd->bdev, pkt->w_bio->bi_inline_vecs, pkt->frames, + REQ_OP_WRITE); + pkt->w_bio->bi_iter.bi_sector = pkt->sector; + pkt->w_bio->bi_end_io = pkt_end_io_packet_write; + pkt->w_bio->bi_private = pkt; + + /* XXX: locking? */ + for (f = 0; f < pkt->frames; f++) { + struct page *page = pkt->pages[(f * CD_FRAMESIZE) / PAGE_SIZE]; + unsigned offset = (f * CD_FRAMESIZE) % PAGE_SIZE; + + if (!bio_add_page(pkt->w_bio, page, CD_FRAMESIZE, offset)) + BUG(); + } + pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt); + + /* + * Fill-in bvec with data from orig_bios. + */ + spin_lock(&pkt->lock); + bio_list_copy_data(pkt->w_bio, pkt->orig_bios.head); + + pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE); + spin_unlock(&pkt->lock); + + pkt_dbg(2, pd, "Writing %d frames for zone %llx\n", + pkt->write_size, (unsigned long long)pkt->sector); + + if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) + pkt->cache_valid = 1; + else + pkt->cache_valid = 0; + + /* Start the write request */ + atomic_set(&pkt->io_wait, 1); + pkt_queue_bio(pd, pkt->w_bio); +} + +static void pkt_finish_packet(struct packet_data *pkt, blk_status_t status) +{ + struct bio *bio; + + if (status) + pkt->cache_valid = 0; + + /* Finish all bios corresponding to this packet */ + while ((bio = bio_list_pop(&pkt->orig_bios))) { + bio->bi_status = status; + bio_endio(bio); + } +} + +static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data *pkt) +{ + pkt_dbg(2, pd, "pkt %d\n", pkt->id); + + for (;;) { + switch (pkt->state) { + case PACKET_WAITING_STATE: + if ((pkt->write_size < pkt->frames) && (pkt->sleep_time > 0)) + return; + + pkt->sleep_time = 0; + pkt_gather_data(pd, pkt); + pkt_set_state(pkt, PACKET_READ_WAIT_STATE); + break; + + case PACKET_READ_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (atomic_read(&pkt->io_errors) > 0) { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } else { + pkt_start_write(pd, pkt); + } + break; + + case PACKET_WRITE_WAIT_STATE: + if (atomic_read(&pkt->io_wait) > 0) + return; + + if (!pkt->w_bio->bi_status) { + pkt_set_state(pkt, PACKET_FINISHED_STATE); + } else { + pkt_set_state(pkt, PACKET_RECOVERY_STATE); + } + break; + + case PACKET_RECOVERY_STATE: + pkt_dbg(2, pd, "No recovery possible\n"); + pkt_set_state(pkt, PACKET_FINISHED_STATE); + break; + + case PACKET_FINISHED_STATE: + pkt_finish_packet(pkt, pkt->w_bio->bi_status); + return; + + default: + BUG(); + break; + } + } +} + +static void pkt_handle_packets(struct pktcdvd_device *pd) +{ + struct packet_data *pkt, *next; + + /* + * Run state machine for active packets + */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) { + atomic_set(&pkt->run_sm, 0); + pkt_run_state_machine(pd, pkt); + } + } + + /* + * Move no longer active packets to the free list + */ + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry_safe(pkt, next, &pd->cdrw.pkt_active_list, list) { + if (pkt->state == PACKET_FINISHED_STATE) { + list_del(&pkt->list); + pkt_put_packet_data(pd, pkt); + pkt_set_state(pkt, PACKET_IDLE_STATE); + atomic_set(&pd->scan_queue, 1); + } + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +static void pkt_count_states(struct pktcdvd_device *pd, int *states) +{ + struct packet_data *pkt; + int i; + + for (i = 0; i < PACKET_NUM_STATES; i++) + states[i] = 0; + + spin_lock(&pd->cdrw.active_list_lock); + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + states[pkt->state]++; + } + spin_unlock(&pd->cdrw.active_list_lock); +} + +/* + * kcdrwd is woken up when writes have been queued for one of our + * registered devices + */ +static int kcdrwd(void *foobar) +{ + struct pktcdvd_device *pd = foobar; + struct packet_data *pkt; + long min_sleep_time, residue; + + set_user_nice(current, MIN_NICE); + set_freezable(); + + for (;;) { + DECLARE_WAITQUEUE(wait, current); + + /* + * Wait until there is something to do + */ + add_wait_queue(&pd->wqueue, &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* Check if we need to run pkt_handle_queue */ + if (atomic_read(&pd->scan_queue) > 0) + goto work_to_do; + + /* Check if we need to run the state machine for some packet */ + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (atomic_read(&pkt->run_sm) > 0) + goto work_to_do; + } + + /* Check if we need to process the iosched queues */ + if (atomic_read(&pd->iosched.attention) != 0) + goto work_to_do; + + /* Otherwise, go to sleep */ + if (PACKET_DEBUG > 1) { + int states[PACKET_NUM_STATES]; + pkt_count_states(pd, states); + pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], + states[3], states[4], states[5]); + } + + min_sleep_time = MAX_SCHEDULE_TIMEOUT; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sleep_time && pkt->sleep_time < min_sleep_time) + min_sleep_time = pkt->sleep_time; + } + + pkt_dbg(2, pd, "sleeping\n"); + residue = schedule_timeout(min_sleep_time); + pkt_dbg(2, pd, "wake up\n"); + + /* make swsusp happy with our thread */ + try_to_freeze(); + + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (!pkt->sleep_time) + continue; + pkt->sleep_time -= min_sleep_time - residue; + if (pkt->sleep_time <= 0) { + pkt->sleep_time = 0; + atomic_inc(&pkt->run_sm); + } + } + + if (kthread_should_stop()) + break; + } +work_to_do: + set_current_state(TASK_RUNNING); + remove_wait_queue(&pd->wqueue, &wait); + + if (kthread_should_stop()) + break; + + /* + * if pkt_handle_queue returns true, we can queue + * another request. + */ + while (pkt_handle_queue(pd)) + ; + + /* + * Handle packet state machine + */ + pkt_handle_packets(pd); + + /* + * Handle iosched queues + */ + pkt_iosched_process_queue(pd); + } + + return 0; +} + +static void pkt_print_settings(struct pktcdvd_device *pd) +{ + pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n", + pd->settings.fp ? "Fixed" : "Variable", + pd->settings.size >> 2, + pd->settings.block_mode == 8 ? '1' : '2'); +} + +static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + + cgc->cmd[0] = GPCMD_MODE_SENSE_10; + cgc->cmd[2] = page_code | (page_control << 6); + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_READ; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_mode_select(struct pktcdvd_device *pd, struct packet_command *cgc) +{ + memset(cgc->cmd, 0, sizeof(cgc->cmd)); + memset(cgc->buffer, 0, 2); + cgc->cmd[0] = GPCMD_MODE_SELECT_10; + cgc->cmd[1] = 0x10; /* PF */ + cgc->cmd[7] = cgc->buflen >> 8; + cgc->cmd[8] = cgc->buflen & 0xff; + cgc->data_direction = CGC_DATA_WRITE; + return pkt_generic_packet(pd, cgc); +} + +static int pkt_get_disc_info(struct pktcdvd_device *pd, disc_information *di) +{ + struct packet_command cgc; + int ret; + + /* set up command and get the disc info */ + init_cdrom_command(&cgc, di, sizeof(*di), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_DISC_INFO; + cgc.cmd[8] = cgc.buflen = 2; + cgc.quiet = 1; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + return ret; + + /* not all drives have the same disc_info length, so requeue + * packet with the length the drive tells us it can supply + */ + cgc.buflen = be16_to_cpu(di->disc_information_length) + + sizeof(di->disc_information_length); + + if (cgc.buflen > sizeof(disc_information)) + cgc.buflen = sizeof(disc_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static int pkt_get_track_info(struct pktcdvd_device *pd, __u16 track, __u8 type, track_information *ti) +{ + struct packet_command cgc; + int ret; + + init_cdrom_command(&cgc, ti, 8, CGC_DATA_READ); + cgc.cmd[0] = GPCMD_READ_TRACK_RZONE_INFO; + cgc.cmd[1] = type & 3; + cgc.cmd[4] = (track & 0xff00) >> 8; + cgc.cmd[5] = track & 0xff; + cgc.cmd[8] = 8; + cgc.quiet = 1; + + ret = pkt_generic_packet(pd, &cgc); + if (ret) + return ret; + + cgc.buflen = be16_to_cpu(ti->track_information_length) + + sizeof(ti->track_information_length); + + if (cgc.buflen > sizeof(track_information)) + cgc.buflen = sizeof(track_information); + + cgc.cmd[8] = cgc.buflen; + return pkt_generic_packet(pd, &cgc); +} + +static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd, + long *last_written) +{ + disc_information di; + track_information ti; + __u32 last_track; + int ret; + + ret = pkt_get_disc_info(pd, &di); + if (ret) + return ret; + + last_track = (di.last_track_msb << 8) | di.last_track_lsb; + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) + return ret; + + /* if this track is blank, try the previous. */ + if (ti.blank) { + last_track--; + ret = pkt_get_track_info(pd, last_track, 1, &ti); + if (ret) + return ret; + } + + /* if last recorded field is valid, return it. */ + if (ti.lra_v) { + *last_written = be32_to_cpu(ti.last_rec_address); + } else { + /* make it up instead */ + *last_written = be32_to_cpu(ti.track_start) + + be32_to_cpu(ti.track_size); + if (ti.free_blocks) + *last_written -= (be32_to_cpu(ti.free_blocks) + 7); + } + return 0; +} + +/* + * write mode select package based on pd->settings + */ +static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + write_param_page *wp; + char buffer[128]; + int ret, size; + + /* doesn't apply to DVD+RW or DVD-RAM */ + if ((pd->mmc3_profile == 0x1a) || (pd->mmc3_profile == 0x12)) + return 0; + + memset(buffer, 0, sizeof(buffer)); + init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ); + cgc.sshdr = &sshdr; + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + size = 2 + ((buffer[0] << 8) | (buffer[1] & 0xff)); + pd->mode_offset = (buffer[6] << 8) | (buffer[7] & 0xff); + if (size > sizeof(buffer)) + size = sizeof(buffer); + + /* + * now get it all + */ + init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ); + cgc.sshdr = &sshdr; + ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + /* + * write page is offset header + block descriptor length + */ + wp = (write_param_page *) &buffer[sizeof(struct mode_page_header) + pd->mode_offset]; + + wp->fp = pd->settings.fp; + wp->track_mode = pd->settings.track_mode; + wp->write_type = pd->settings.write_type; + wp->data_block_type = pd->settings.block_mode; + + wp->multi_session = 0; + +#ifdef PACKET_USE_LS + wp->link_size = 7; + wp->ls_v = 1; +#endif + + if (wp->data_block_type == PACKET_BLOCK_MODE1) { + wp->session_format = 0; + wp->subhdr2 = 0x20; + } else if (wp->data_block_type == PACKET_BLOCK_MODE2) { + wp->session_format = 0x20; + wp->subhdr2 = 8; +#if 0 + wp->mcn[0] = 0x80; + memcpy(&wp->mcn[1], PACKET_MCN, sizeof(wp->mcn) - 1); +#endif + } else { + /* + * paranoia + */ + pkt_err(pd, "write mode wrong %d\n", wp->data_block_type); + return 1; + } + wp->packet_size = cpu_to_be32(pd->settings.size >> 2); + + cgc.buflen = cgc.cmd[8] = size; + ret = pkt_mode_select(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + pkt_print_settings(pd); + return 0; +} + +/* + * 1 -- we can write to this track, 0 -- we can't + */ +static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti) +{ + switch (pd->mmc3_profile) { + case 0x1a: /* DVD+RW */ + case 0x12: /* DVD-RAM */ + /* The track is always writable on DVD+RW/DVD-RAM */ + return 1; + default: + break; + } + + if (!ti->packet || !ti->fp) + return 0; + + /* + * "good" settings as per Mt Fuji. + */ + if (ti->rt == 0 && ti->blank == 0) + return 1; + + if (ti->rt == 0 && ti->blank == 1) + return 1; + + if (ti->rt == 1 && ti->blank == 0) + return 1; + + pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet); + return 0; +} + +/* + * 1 -- we can write to this disc, 0 -- we can't + */ +static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di) +{ + switch (pd->mmc3_profile) { + case 0x0a: /* CD-RW */ + case 0xffff: /* MMC3 not supported */ + break; + case 0x1a: /* DVD+RW */ + case 0x13: /* DVD-RW */ + case 0x12: /* DVD-RAM */ + return 1; + default: + pkt_dbg(2, pd, "Wrong disc profile (%x)\n", + pd->mmc3_profile); + return 0; + } + + /* + * for disc type 0xff we should probably reserve a new track. + * but i'm not sure, should we leave this to user apps? probably. + */ + if (di->disc_type == 0xff) { + pkt_notice(pd, "unknown disc - no track?\n"); + return 0; + } + + if (di->disc_type != 0x20 && di->disc_type != 0) { + pkt_err(pd, "wrong disc type (%x)\n", di->disc_type); + return 0; + } + + if (di->erasable == 0) { + pkt_notice(pd, "disc not erasable\n"); + return 0; + } + + if (di->border_status == PACKET_SESSION_RESERVED) { + pkt_err(pd, "can't write to last track (reserved)\n"); + return 0; + } + + return 1; +} + +static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + unsigned char buf[12]; + disc_information di; + track_information ti; + int ret, track; + + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.cmd[0] = GPCMD_GET_CONFIGURATION; + cgc.cmd[8] = 8; + ret = pkt_generic_packet(pd, &cgc); + pd->mmc3_profile = ret ? 0xffff : buf[6] << 8 | buf[7]; + + memset(&di, 0, sizeof(disc_information)); + memset(&ti, 0, sizeof(track_information)); + + ret = pkt_get_disc_info(pd, &di); + if (ret) { + pkt_err(pd, "failed get_disc\n"); + return ret; + } + + if (!pkt_writable_disc(pd, &di)) + return -EROFS; + + pd->type = di.erasable ? PACKET_CDRW : PACKET_CDR; + + track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */ + ret = pkt_get_track_info(pd, track, 1, &ti); + if (ret) { + pkt_err(pd, "failed get_track\n"); + return ret; + } + + if (!pkt_writable_track(pd, &ti)) { + pkt_err(pd, "can't write to this track\n"); + return -EROFS; + } + + /* + * we keep packet size in 512 byte units, makes it easier to + * deal with request calculations. + */ + pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2; + if (pd->settings.size == 0) { + pkt_notice(pd, "detected zero packet size!\n"); + return -ENXIO; + } + if (pd->settings.size > PACKET_MAX_SECTORS) { + pkt_err(pd, "packet size is too big\n"); + return -EROFS; + } + pd->settings.fp = ti.fp; + pd->offset = (be32_to_cpu(ti.track_start) << 2) & (pd->settings.size - 1); + + if (ti.nwa_v) { + pd->nwa = be32_to_cpu(ti.next_writable); + set_bit(PACKET_NWA_VALID, &pd->flags); + } + + /* + * in theory we could use lra on -RW media as well and just zero + * blocks that haven't been written yet, but in practice that + * is just a no-go. we'll use that for -R, naturally. + */ + if (ti.lra_v) { + pd->lra = be32_to_cpu(ti.last_rec_address); + set_bit(PACKET_LRA_VALID, &pd->flags); + } else { + pd->lra = 0xffffffff; + set_bit(PACKET_LRA_VALID, &pd->flags); + } + + /* + * fine for now + */ + pd->settings.link_loss = 7; + pd->settings.write_type = 0; /* packet */ + pd->settings.track_mode = ti.track_mode; + + /* + * mode1 or mode2 disc + */ + switch (ti.data_mode) { + case PACKET_MODE1: + pd->settings.block_mode = PACKET_BLOCK_MODE1; + break; + case PACKET_MODE2: + pd->settings.block_mode = PACKET_BLOCK_MODE2; + break; + default: + pkt_err(pd, "unknown data mode\n"); + return -EROFS; + } + return 0; +} + +/* + * enable/disable write caching on drive + */ +static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd, + int set) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[64]; + int ret; + + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.buflen = pd->mode_offset + 12; + + /* + * caching mode page might not be there, so quiet this command + */ + cgc.quiet = 1; + + ret = pkt_mode_sense(pd, &cgc, GPMODE_WCACHING_PAGE, 0); + if (ret) + return ret; + + buf[pd->mode_offset + 10] |= (!!set << 2); + + cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff)); + ret = pkt_mode_select(pd, &cgc); + if (ret) { + pkt_err(pd, "write caching control failed\n"); + pkt_dump_sense(pd, &cgc); + } else if (!ret && set) + pkt_notice(pd, "enabled write caching\n"); + return ret; +} + +static int pkt_lock_door(struct pktcdvd_device *pd, int lockflag) +{ + struct packet_command cgc; + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.cmd[0] = GPCMD_PREVENT_ALLOW_MEDIUM_REMOVAL; + cgc.cmd[4] = lockflag ? 1 : 0; + return pkt_generic_packet(pd, &cgc); +} + +/* + * Returns drive maximum write speed + */ +static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd, + unsigned *write_speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[256+18]; + unsigned char *cap_buf; + int ret, offset; + + cap_buf = &buf[sizeof(struct mode_page_header) + pd->mode_offset]; + init_cdrom_command(&cgc, buf, sizeof(buf), CGC_DATA_UNKNOWN); + cgc.sshdr = &sshdr; + + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + cgc.buflen = pd->mode_offset + cap_buf[1] + 2 + + sizeof(struct mode_page_header); + ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + } + + offset = 20; /* Obsoleted field, used by older drives */ + if (cap_buf[1] >= 28) + offset = 28; /* Current write speed selected */ + if (cap_buf[1] >= 30) { + /* If the drive reports at least one "Logical Unit Write + * Speed Performance Descriptor Block", use the information + * in the first block. (contains the highest speed) + */ + int num_spdb = (cap_buf[30] << 8) + cap_buf[31]; + if (num_spdb > 0) + offset = 34; + } + + *write_speed = (cap_buf[offset] << 8) | cap_buf[offset + 1]; + return 0; +} + +/* These tables from cdrecord - I don't have orange book */ +/* standard speed CD-RW (1-4x) */ +static char clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* high speed CD-RW (-10x) */ +static char hs_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 6, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +}; +/* ultra high speed CD-RW */ +static char us_clv_to_speed[16] = { + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 */ + 0, 2, 4, 8, 0, 0,16, 0,24,32,40,48, 0, 0, 0, 0 +}; + +/* + * reads the maximum media speed from ATIP + */ +static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd, + unsigned *speed) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + unsigned char buf[64]; + unsigned int size, st, sp; + int ret; + + init_cdrom_command(&cgc, buf, 2, CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; /* READ ATIP */ + cgc.cmd[8] = 2; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + size = ((unsigned int) buf[0]<<8) + buf[1] + 2; + if (size > sizeof(buf)) + size = sizeof(buf); + + init_cdrom_command(&cgc, buf, size, CGC_DATA_READ); + cgc.sshdr = &sshdr; + cgc.cmd[0] = GPCMD_READ_TOC_PMA_ATIP; + cgc.cmd[1] = 2; + cgc.cmd[2] = 4; + cgc.cmd[8] = size; + ret = pkt_generic_packet(pd, &cgc); + if (ret) { + pkt_dump_sense(pd, &cgc); + return ret; + } + + if (!(buf[6] & 0x40)) { + pkt_notice(pd, "disc type is not CD-RW\n"); + return 1; + } + if (!(buf[6] & 0x4)) { + pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n"); + return 1; + } + + st = (buf[6] >> 3) & 0x7; /* disc sub-type */ + + sp = buf[16] & 0xf; /* max speed from ATIP A1 field */ + + /* Info from cdrecord */ + switch (st) { + case 0: /* standard speed */ + *speed = clv_to_speed[sp]; + break; + case 1: /* high speed */ + *speed = hs_clv_to_speed[sp]; + break; + case 2: /* ultra high speed */ + *speed = us_clv_to_speed[sp]; + break; + default: + pkt_notice(pd, "unknown disc sub-type %d\n", st); + return 1; + } + if (*speed) { + pkt_info(pd, "maximum media speed: %d\n", *speed); + return 0; + } else { + pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st); + return 1; + } +} + +static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd) +{ + struct packet_command cgc; + struct scsi_sense_hdr sshdr; + int ret; + + pkt_dbg(2, pd, "Performing OPC\n"); + + init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE); + cgc.sshdr = &sshdr; + cgc.timeout = 60*HZ; + cgc.cmd[0] = GPCMD_SEND_OPC; + cgc.cmd[1] = 1; + ret = pkt_generic_packet(pd, &cgc); + if (ret) + pkt_dump_sense(pd, &cgc); + return ret; +} + +static int pkt_open_write(struct pktcdvd_device *pd) +{ + int ret; + unsigned int write_speed, media_write_speed, read_speed; + + ret = pkt_probe_settings(pd); + if (ret) { + pkt_dbg(2, pd, "failed probe\n"); + return ret; + } + + ret = pkt_set_write_settings(pd); + if (ret) { + pkt_dbg(1, pd, "failed saving write settings\n"); + return -EIO; + } + + pkt_write_caching(pd, USE_WCACHING); + + ret = pkt_get_max_speed(pd, &write_speed); + if (ret) + write_speed = 16 * 177; + switch (pd->mmc3_profile) { + case 0x13: /* DVD-RW */ + case 0x1a: /* DVD+RW */ + case 0x12: /* DVD-RAM */ + pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed); + break; + default: + ret = pkt_media_speed(pd, &media_write_speed); + if (ret) + media_write_speed = 16; + write_speed = min(write_speed, media_write_speed * 177); + pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176); + break; + } + read_speed = write_speed; + + ret = pkt_set_speed(pd, write_speed, read_speed); + if (ret) { + pkt_dbg(1, pd, "couldn't set write speed\n"); + return -EIO; + } + pd->write_speed = write_speed; + pd->read_speed = read_speed; + + ret = pkt_perform_opc(pd); + if (ret) { + pkt_dbg(1, pd, "Optimum Power Calibration failed\n"); + } + + return 0; +} + +/* + * called at open time. + */ +static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write) +{ + int ret; + long lba; + struct request_queue *q; + struct block_device *bdev; + + /* + * We need to re-open the cdrom device without O_NONBLOCK to be able + * to read/write from/to it. It is already opened in O_NONBLOCK mode + * so open should not fail. + */ + bdev = blkdev_get_by_dev(pd->bdev->bd_dev, FMODE_READ | FMODE_EXCL, pd); + if (IS_ERR(bdev)) { + ret = PTR_ERR(bdev); + goto out; + } + + ret = pkt_get_last_written(pd, &lba); + if (ret) { + pkt_err(pd, "pkt_get_last_written failed\n"); + goto out_putdev; + } + + set_capacity(pd->disk, lba << 2); + set_capacity_and_notify(pd->bdev->bd_disk, lba << 2); + + q = bdev_get_queue(pd->bdev); + if (write) { + ret = pkt_open_write(pd); + if (ret) + goto out_putdev; + /* + * Some CDRW drives can not handle writes larger than one packet, + * even if the size is a multiple of the packet size. + */ + blk_queue_max_hw_sectors(q, pd->settings.size); + set_bit(PACKET_WRITABLE, &pd->flags); + } else { + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + clear_bit(PACKET_WRITABLE, &pd->flags); + } + + ret = pkt_set_segment_merging(pd, q); + if (ret) + goto out_putdev; + + if (write) { + if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) { + pkt_err(pd, "not enough memory for buffers\n"); + ret = -ENOMEM; + goto out_putdev; + } + pkt_info(pd, "%lukB available on disc\n", lba << 1); + } + + return 0; + +out_putdev: + blkdev_put(bdev, FMODE_READ | FMODE_EXCL); +out: + return ret; +} + +/* + * called when the device is closed. makes sure that the device flushes + * the internal cache before we close. + */ +static void pkt_release_dev(struct pktcdvd_device *pd, int flush) +{ + if (flush && pkt_flush_cache(pd)) + pkt_dbg(1, pd, "not flushing cache\n"); + + pkt_lock_door(pd, 0); + + pkt_set_speed(pd, MAX_SPEED, MAX_SPEED); + blkdev_put(pd->bdev, FMODE_READ | FMODE_EXCL); + + pkt_shrink_pktlist(pd); +} + +static struct pktcdvd_device *pkt_find_dev_from_minor(unsigned int dev_minor) +{ + if (dev_minor >= MAX_WRITERS) + return NULL; + + dev_minor = array_index_nospec(dev_minor, MAX_WRITERS); + return pkt_devs[dev_minor]; +} + +static int pkt_open(struct block_device *bdev, fmode_t mode) +{ + struct pktcdvd_device *pd = NULL; + int ret; + + mutex_lock(&pktcdvd_mutex); + mutex_lock(&ctl_mutex); + pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev)); + if (!pd) { + ret = -ENODEV; + goto out; + } + BUG_ON(pd->refcnt < 0); + + pd->refcnt++; + if (pd->refcnt > 1) { + if ((mode & FMODE_WRITE) && + !test_bit(PACKET_WRITABLE, &pd->flags)) { + ret = -EBUSY; + goto out_dec; + } + } else { + ret = pkt_open_dev(pd, mode & FMODE_WRITE); + if (ret) + goto out_dec; + /* + * needed here as well, since ext2 (among others) may change + * the blocksize at mount time + */ + set_blocksize(bdev, CD_FRAMESIZE); + } + + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); + return 0; + +out_dec: + pd->refcnt--; +out: + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); + return ret; +} + +static void pkt_close(struct gendisk *disk, fmode_t mode) +{ + struct pktcdvd_device *pd = disk->private_data; + + mutex_lock(&pktcdvd_mutex); + mutex_lock(&ctl_mutex); + pd->refcnt--; + BUG_ON(pd->refcnt < 0); + if (pd->refcnt == 0) { + int flush = test_bit(PACKET_WRITABLE, &pd->flags); + pkt_release_dev(pd, flush); + } + mutex_unlock(&ctl_mutex); + mutex_unlock(&pktcdvd_mutex); +} + + +static void pkt_end_io_read_cloned(struct bio *bio) +{ + struct packet_stacked_data *psd = bio->bi_private; + struct pktcdvd_device *pd = psd->pd; + + psd->bio->bi_status = bio->bi_status; + bio_put(bio); + bio_endio(psd->bio); + mempool_free(psd, &psd_pool); + pkt_bio_finished(pd); +} + +static void pkt_make_request_read(struct pktcdvd_device *pd, struct bio *bio) +{ + struct bio *cloned_bio = + bio_alloc_clone(pd->bdev, bio, GFP_NOIO, &pkt_bio_set); + struct packet_stacked_data *psd = mempool_alloc(&psd_pool, GFP_NOIO); + + psd->pd = pd; + psd->bio = bio; + cloned_bio->bi_private = psd; + cloned_bio->bi_end_io = pkt_end_io_read_cloned; + pd->stats.secs_r += bio_sectors(bio); + pkt_queue_bio(pd, cloned_bio); +} + +static void pkt_make_request_write(struct request_queue *q, struct bio *bio) +{ + struct pktcdvd_device *pd = q->queuedata; + sector_t zone; + struct packet_data *pkt; + int was_empty, blocked_bio; + struct pkt_rb_node *node; + + zone = get_zone(bio->bi_iter.bi_sector, pd); + + /* + * If we find a matching packet in state WAITING or READ_WAIT, we can + * just append this bio to that packet. + */ + spin_lock(&pd->cdrw.active_list_lock); + blocked_bio = 0; + list_for_each_entry(pkt, &pd->cdrw.pkt_active_list, list) { + if (pkt->sector == zone) { + spin_lock(&pkt->lock); + if ((pkt->state == PACKET_WAITING_STATE) || + (pkt->state == PACKET_READ_WAIT_STATE)) { + bio_list_add(&pkt->orig_bios, bio); + pkt->write_size += + bio->bi_iter.bi_size / CD_FRAMESIZE; + if ((pkt->write_size >= pkt->frames) && + (pkt->state == PACKET_WAITING_STATE)) { + atomic_inc(&pkt->run_sm); + wake_up(&pd->wqueue); + } + spin_unlock(&pkt->lock); + spin_unlock(&pd->cdrw.active_list_lock); + return; + } else { + blocked_bio = 1; + } + spin_unlock(&pkt->lock); + } + } + spin_unlock(&pd->cdrw.active_list_lock); + + /* + * Test if there is enough room left in the bio work queue + * (queue size >= congestion on mark). + * If not, wait till the work queue size is below the congestion off mark. + */ + spin_lock(&pd->lock); + if (pd->write_congestion_on > 0 + && pd->bio_queue_size >= pd->write_congestion_on) { + struct wait_bit_queue_entry wqe; + + init_wait_var_entry(&wqe, &pd->congested, 0); + for (;;) { + prepare_to_wait_event(__var_waitqueue(&pd->congested), + &wqe.wq_entry, + TASK_UNINTERRUPTIBLE); + if (pd->bio_queue_size <= pd->write_congestion_off) + break; + pd->congested = true; + spin_unlock(&pd->lock); + schedule(); + spin_lock(&pd->lock); + } + } + spin_unlock(&pd->lock); + + /* + * No matching packet found. Store the bio in the work queue. + */ + node = mempool_alloc(&pd->rb_pool, GFP_NOIO); + node->bio = bio; + spin_lock(&pd->lock); + BUG_ON(pd->bio_queue_size < 0); + was_empty = (pd->bio_queue_size == 0); + pkt_rbtree_insert(pd, node); + spin_unlock(&pd->lock); + + /* + * Wake up the worker thread. + */ + atomic_set(&pd->scan_queue, 1); + if (was_empty) { + /* This wake_up is required for correct operation */ + wake_up(&pd->wqueue); + } else if (!list_empty(&pd->cdrw.pkt_free_list) && !blocked_bio) { + /* + * This wake up is not required for correct operation, + * but improves performance in some cases. + */ + wake_up(&pd->wqueue); + } +} + +static void pkt_submit_bio(struct bio *bio) +{ + struct pktcdvd_device *pd = bio->bi_bdev->bd_disk->queue->queuedata; + struct bio *split; + + bio = bio_split_to_limits(bio); + + pkt_dbg(2, pd, "start = %6llx stop = %6llx\n", + (unsigned long long)bio->bi_iter.bi_sector, + (unsigned long long)bio_end_sector(bio)); + + /* + * Clone READ bios so we can have our own bi_end_io callback. + */ + if (bio_data_dir(bio) == READ) { + pkt_make_request_read(pd, bio); + return; + } + + if (!test_bit(PACKET_WRITABLE, &pd->flags)) { + pkt_notice(pd, "WRITE for ro device (%llu)\n", + (unsigned long long)bio->bi_iter.bi_sector); + goto end_io; + } + + if (!bio->bi_iter.bi_size || (bio->bi_iter.bi_size % CD_FRAMESIZE)) { + pkt_err(pd, "wrong bio size\n"); + goto end_io; + } + + do { + sector_t zone = get_zone(bio->bi_iter.bi_sector, pd); + sector_t last_zone = get_zone(bio_end_sector(bio) - 1, pd); + + if (last_zone != zone) { + BUG_ON(last_zone != zone + pd->settings.size); + + split = bio_split(bio, last_zone - + bio->bi_iter.bi_sector, + GFP_NOIO, &pkt_bio_set); + bio_chain(split, bio); + } else { + split = bio; + } + + pkt_make_request_write(bio->bi_bdev->bd_disk->queue, split); + } while (split != bio); + + return; +end_io: + bio_io_error(bio); +} + +static void pkt_init_queue(struct pktcdvd_device *pd) +{ + struct request_queue *q = pd->disk->queue; + + blk_queue_logical_block_size(q, CD_FRAMESIZE); + blk_queue_max_hw_sectors(q, PACKET_MAX_SECTORS); + q->queuedata = pd; +} + +static int pkt_seq_show(struct seq_file *m, void *p) +{ + struct pktcdvd_device *pd = m->private; + char *msg; + int states[PACKET_NUM_STATES]; + + seq_printf(m, "Writer %s mapped to %pg:\n", pd->name, pd->bdev); + + seq_printf(m, "\nSettings:\n"); + seq_printf(m, "\tpacket size:\t\t%dkB\n", pd->settings.size / 2); + + if (pd->settings.write_type == 0) + msg = "Packet"; + else + msg = "Unknown"; + seq_printf(m, "\twrite type:\t\t%s\n", msg); + + seq_printf(m, "\tpacket type:\t\t%s\n", pd->settings.fp ? "Fixed" : "Variable"); + seq_printf(m, "\tlink loss:\t\t%d\n", pd->settings.link_loss); + + seq_printf(m, "\ttrack mode:\t\t%d\n", pd->settings.track_mode); + + if (pd->settings.block_mode == PACKET_BLOCK_MODE1) + msg = "Mode 1"; + else if (pd->settings.block_mode == PACKET_BLOCK_MODE2) + msg = "Mode 2"; + else + msg = "Unknown"; + seq_printf(m, "\tblock mode:\t\t%s\n", msg); + + seq_printf(m, "\nStatistics:\n"); + seq_printf(m, "\tpackets started:\t%lu\n", pd->stats.pkt_started); + seq_printf(m, "\tpackets ended:\t\t%lu\n", pd->stats.pkt_ended); + seq_printf(m, "\twritten:\t\t%lukB\n", pd->stats.secs_w >> 1); + seq_printf(m, "\tread gather:\t\t%lukB\n", pd->stats.secs_rg >> 1); + seq_printf(m, "\tread:\t\t\t%lukB\n", pd->stats.secs_r >> 1); + + seq_printf(m, "\nMisc:\n"); + seq_printf(m, "\treference count:\t%d\n", pd->refcnt); + seq_printf(m, "\tflags:\t\t\t0x%lx\n", pd->flags); + seq_printf(m, "\tread speed:\t\t%ukB/s\n", pd->read_speed); + seq_printf(m, "\twrite speed:\t\t%ukB/s\n", pd->write_speed); + seq_printf(m, "\tstart offset:\t\t%lu\n", pd->offset); + seq_printf(m, "\tmode page offset:\t%u\n", pd->mode_offset); + + seq_printf(m, "\nQueue state:\n"); + seq_printf(m, "\tbios queued:\t\t%d\n", pd->bio_queue_size); + seq_printf(m, "\tbios pending:\t\t%d\n", atomic_read(&pd->cdrw.pending_bios)); + seq_printf(m, "\tcurrent sector:\t\t0x%llx\n", (unsigned long long)pd->current_sector); + + pkt_count_states(pd, states); + seq_printf(m, "\tstate:\t\t\ti:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n", + states[0], states[1], states[2], states[3], states[4], states[5]); + + seq_printf(m, "\twrite congestion marks:\toff=%d on=%d\n", + pd->write_congestion_off, + pd->write_congestion_on); + return 0; +} + +static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev) +{ + int i; + struct block_device *bdev; + struct scsi_device *sdev; + + if (pd->pkt_dev == dev) { + pkt_err(pd, "recursive setup not allowed\n"); + return -EBUSY; + } + for (i = 0; i < MAX_WRITERS; i++) { + struct pktcdvd_device *pd2 = pkt_devs[i]; + if (!pd2) + continue; + if (pd2->bdev->bd_dev == dev) { + pkt_err(pd, "%pg already setup\n", pd2->bdev); + return -EBUSY; + } + if (pd2->pkt_dev == dev) { + pkt_err(pd, "can't chain pktcdvd devices\n"); + return -EBUSY; + } + } + + bdev = blkdev_get_by_dev(dev, FMODE_READ | FMODE_NDELAY, NULL); + if (IS_ERR(bdev)) + return PTR_ERR(bdev); + sdev = scsi_device_from_queue(bdev->bd_disk->queue); + if (!sdev) { + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + return -EINVAL; + } + put_device(&sdev->sdev_gendev); + + /* This is safe, since we have a reference from open(). */ + __module_get(THIS_MODULE); + + pd->bdev = bdev; + set_blocksize(bdev, CD_FRAMESIZE); + + pkt_init_queue(pd); + + atomic_set(&pd->cdrw.pending_bios, 0); + pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name); + if (IS_ERR(pd->cdrw.thread)) { + pkt_err(pd, "can't start kernel thread\n"); + goto out_mem; + } + + proc_create_single_data(pd->name, 0, pkt_proc, pkt_seq_show, pd); + pkt_dbg(1, pd, "writer mapped to %pg\n", bdev); + return 0; + +out_mem: + blkdev_put(bdev, FMODE_READ | FMODE_NDELAY); + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + return -ENOMEM; +} + +static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) +{ + struct pktcdvd_device *pd = bdev->bd_disk->private_data; + int ret; + + pkt_dbg(2, pd, "cmd %x, dev %d:%d\n", + cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev)); + + mutex_lock(&pktcdvd_mutex); + switch (cmd) { + case CDROMEJECT: + /* + * The door gets locked when the device is opened, so we + * have to unlock it or else the eject command fails. + */ + if (pd->refcnt == 1) + pkt_lock_door(pd, 0); + fallthrough; + /* + * forward selected CDROM ioctls to CD-ROM, for UDF + */ + case CDROMMULTISESSION: + case CDROMREADTOCENTRY: + case CDROM_LAST_WRITTEN: + case CDROM_SEND_PACKET: + case SCSI_IOCTL_SEND_COMMAND: + if (!bdev->bd_disk->fops->ioctl) + ret = -ENOTTY; + else + ret = bdev->bd_disk->fops->ioctl(bdev, mode, cmd, arg); + break; + default: + pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd); + ret = -ENOTTY; + } + mutex_unlock(&pktcdvd_mutex); + + return ret; +} + +static unsigned int pkt_check_events(struct gendisk *disk, + unsigned int clearing) +{ + struct pktcdvd_device *pd = disk->private_data; + struct gendisk *attached_disk; + + if (!pd) + return 0; + if (!pd->bdev) + return 0; + attached_disk = pd->bdev->bd_disk; + if (!attached_disk || !attached_disk->fops->check_events) + return 0; + return attached_disk->fops->check_events(attached_disk, clearing); +} + +static char *pkt_devnode(struct gendisk *disk, umode_t *mode) +{ + return kasprintf(GFP_KERNEL, "pktcdvd/%s", disk->disk_name); +} + +static const struct block_device_operations pktcdvd_ops = { + .owner = THIS_MODULE, + .submit_bio = pkt_submit_bio, + .open = pkt_open, + .release = pkt_close, + .ioctl = pkt_ioctl, + .compat_ioctl = blkdev_compat_ptr_ioctl, + .check_events = pkt_check_events, + .devnode = pkt_devnode, +}; + +/* + * Set up mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev) +{ + int idx; + int ret = -ENOMEM; + struct pktcdvd_device *pd; + struct gendisk *disk; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + for (idx = 0; idx < MAX_WRITERS; idx++) + if (!pkt_devs[idx]) + break; + if (idx == MAX_WRITERS) { + pr_err("max %d writers supported\n", MAX_WRITERS); + ret = -EBUSY; + goto out_mutex; + } + + pd = kzalloc(sizeof(struct pktcdvd_device), GFP_KERNEL); + if (!pd) + goto out_mutex; + + ret = mempool_init_kmalloc_pool(&pd->rb_pool, PKT_RB_POOL_SIZE, + sizeof(struct pkt_rb_node)); + if (ret) + goto out_mem; + + INIT_LIST_HEAD(&pd->cdrw.pkt_free_list); + INIT_LIST_HEAD(&pd->cdrw.pkt_active_list); + spin_lock_init(&pd->cdrw.active_list_lock); + + spin_lock_init(&pd->lock); + spin_lock_init(&pd->iosched.lock); + bio_list_init(&pd->iosched.read_queue); + bio_list_init(&pd->iosched.write_queue); + sprintf(pd->name, DRIVER_NAME"%d", idx); + init_waitqueue_head(&pd->wqueue); + pd->bio_queue = RB_ROOT; + + pd->write_congestion_on = write_congestion_on; + pd->write_congestion_off = write_congestion_off; + + ret = -ENOMEM; + disk = blk_alloc_disk(NUMA_NO_NODE); + if (!disk) + goto out_mem; + pd->disk = disk; + disk->major = pktdev_major; + disk->first_minor = idx; + disk->minors = 1; + disk->fops = &pktcdvd_ops; + disk->flags = GENHD_FL_REMOVABLE | GENHD_FL_NO_PART; + strcpy(disk->disk_name, pd->name); + disk->private_data = pd; + + pd->pkt_dev = MKDEV(pktdev_major, idx); + ret = pkt_new_dev(pd, dev); + if (ret) + goto out_mem2; + + /* inherit events of the host device */ + disk->events = pd->bdev->bd_disk->events; + + ret = add_disk(disk); + if (ret) + goto out_mem2; + + pkt_sysfs_dev_new(pd); + pkt_debugfs_dev_new(pd); + + pkt_devs[idx] = pd; + if (pkt_dev) + *pkt_dev = pd->pkt_dev; + + mutex_unlock(&ctl_mutex); + return 0; + +out_mem2: + put_disk(disk); +out_mem: + mempool_exit(&pd->rb_pool); + kfree(pd); +out_mutex: + mutex_unlock(&ctl_mutex); + pr_err("setup of pktcdvd device failed\n"); + return ret; +} + +/* + * Tear down mapping from pktcdvd device to CD-ROM device. + */ +static int pkt_remove_dev(dev_t pkt_dev) +{ + struct pktcdvd_device *pd; + int idx; + int ret = 0; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + for (idx = 0; idx < MAX_WRITERS; idx++) { + pd = pkt_devs[idx]; + if (pd && (pd->pkt_dev == pkt_dev)) + break; + } + if (idx == MAX_WRITERS) { + pr_debug("dev not setup\n"); + ret = -ENXIO; + goto out; + } + + if (pd->refcnt > 0) { + ret = -EBUSY; + goto out; + } + if (!IS_ERR(pd->cdrw.thread)) + kthread_stop(pd->cdrw.thread); + + pkt_devs[idx] = NULL; + + pkt_debugfs_dev_remove(pd); + pkt_sysfs_dev_remove(pd); + + blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY); + + remove_proc_entry(pd->name, pkt_proc); + pkt_dbg(1, pd, "writer unmapped\n"); + + del_gendisk(pd->disk); + put_disk(pd->disk); + + mempool_exit(&pd->rb_pool); + kfree(pd); + + /* This is safe: open() is still holding a reference. */ + module_put(THIS_MODULE); + +out: + mutex_unlock(&ctl_mutex); + return ret; +} + +static void pkt_get_status(struct pkt_ctrl_command *ctrl_cmd) +{ + struct pktcdvd_device *pd; + + mutex_lock_nested(&ctl_mutex, SINGLE_DEPTH_NESTING); + + pd = pkt_find_dev_from_minor(ctrl_cmd->dev_index); + if (pd) { + ctrl_cmd->dev = new_encode_dev(pd->bdev->bd_dev); + ctrl_cmd->pkt_dev = new_encode_dev(pd->pkt_dev); + } else { + ctrl_cmd->dev = 0; + ctrl_cmd->pkt_dev = 0; + } + ctrl_cmd->num_devices = MAX_WRITERS; + + mutex_unlock(&ctl_mutex); +} + +static long pkt_ctl_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct pkt_ctrl_command ctrl_cmd; + int ret = 0; + dev_t pkt_dev = 0; + + if (cmd != PACKET_CTRL_CMD) + return -ENOTTY; + + if (copy_from_user(&ctrl_cmd, argp, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + + switch (ctrl_cmd.command) { + case PKT_CTRL_CMD_SETUP: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = pkt_setup_dev(new_decode_dev(ctrl_cmd.dev), &pkt_dev); + ctrl_cmd.pkt_dev = new_encode_dev(pkt_dev); + break; + case PKT_CTRL_CMD_TEARDOWN: + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + ret = pkt_remove_dev(new_decode_dev(ctrl_cmd.pkt_dev)); + break; + case PKT_CTRL_CMD_STATUS: + pkt_get_status(&ctrl_cmd); + break; + default: + return -ENOTTY; + } + + if (copy_to_user(argp, &ctrl_cmd, sizeof(struct pkt_ctrl_command))) + return -EFAULT; + return ret; +} + +#ifdef CONFIG_COMPAT +static long pkt_ctl_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + return pkt_ctl_ioctl(file, cmd, (unsigned long)compat_ptr(arg)); +} +#endif + +static const struct file_operations pkt_ctl_fops = { + .open = nonseekable_open, + .unlocked_ioctl = pkt_ctl_ioctl, +#ifdef CONFIG_COMPAT + .compat_ioctl = pkt_ctl_compat_ioctl, +#endif + .owner = THIS_MODULE, + .llseek = no_llseek, +}; + +static struct miscdevice pkt_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = DRIVER_NAME, + .nodename = "pktcdvd/control", + .fops = &pkt_ctl_fops +}; + +static int __init pkt_init(void) +{ + int ret; + + mutex_init(&ctl_mutex); + + ret = mempool_init_kmalloc_pool(&psd_pool, PSD_POOL_SIZE, + sizeof(struct packet_stacked_data)); + if (ret) + return ret; + ret = bioset_init(&pkt_bio_set, BIO_POOL_SIZE, 0, 0); + if (ret) { + mempool_exit(&psd_pool); + return ret; + } + + ret = register_blkdev(pktdev_major, DRIVER_NAME); + if (ret < 0) { + pr_err("unable to register block device\n"); + goto out2; + } + if (!pktdev_major) + pktdev_major = ret; + + ret = pkt_sysfs_init(); + if (ret) + goto out; + + pkt_debugfs_init(); + + ret = misc_register(&pkt_misc); + if (ret) { + pr_err("unable to register misc device\n"); + goto out_misc; + } + + pkt_proc = proc_mkdir("driver/"DRIVER_NAME, NULL); + + return 0; + +out_misc: + pkt_debugfs_cleanup(); + pkt_sysfs_cleanup(); +out: + unregister_blkdev(pktdev_major, DRIVER_NAME); +out2: + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); + return ret; +} + +static void __exit pkt_exit(void) +{ + remove_proc_entry("driver/"DRIVER_NAME, NULL); + misc_deregister(&pkt_misc); + + pkt_debugfs_cleanup(); + pkt_sysfs_cleanup(); + + unregister_blkdev(pktdev_major, DRIVER_NAME); + mempool_exit(&psd_pool); + bioset_exit(&pkt_bio_set); +} + +MODULE_DESCRIPTION("Packet writing layer for CD/DVD drives"); +MODULE_AUTHOR("Jens Axboe "); +MODULE_LICENSE("GPL"); + +module_init(pkt_init); +module_exit(pkt_exit); diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h new file mode 100644 index 000000000000..f9c5ac80d59b --- /dev/null +++ b/include/linux/pktcdvd.h @@ -0,0 +1,197 @@ +/* + * Copyright (C) 2000 Jens Axboe + * Copyright (C) 2001-2004 Peter Osterlund + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and + * DVD-RW devices. + * + */ +#ifndef __PKTCDVD_H +#define __PKTCDVD_H + +#include +#include +#include +#include +#include +#include +#include + +/* default bio write queue congestion marks */ +#define PKT_WRITE_CONGESTION_ON 10000 +#define PKT_WRITE_CONGESTION_OFF 9000 + + +struct packet_settings +{ + __u32 size; /* packet size in (512 byte) sectors */ + __u8 fp; /* fixed packets */ + __u8 link_loss; /* the rest is specified + * as per Mt Fuji */ + __u8 write_type; + __u8 track_mode; + __u8 block_mode; +}; + +/* + * Very crude stats for now + */ +struct packet_stats +{ + unsigned long pkt_started; + unsigned long pkt_ended; + unsigned long secs_w; + unsigned long secs_rg; + unsigned long secs_r; +}; + +struct packet_cdrw +{ + struct list_head pkt_free_list; + struct list_head pkt_active_list; + spinlock_t active_list_lock; /* Serialize access to pkt_active_list */ + struct task_struct *thread; + atomic_t pending_bios; +}; + +/* + * Switch to high speed reading after reading this many kilobytes + * with no interspersed writes. + */ +#define HI_SPEED_SWITCH 512 + +struct packet_iosched +{ + atomic_t attention; /* Set to non-zero when queue processing is needed */ + int writing; /* Non-zero when writing, zero when reading */ + spinlock_t lock; /* Protecting read/write queue manipulations */ + struct bio_list read_queue; + struct bio_list write_queue; + sector_t last_write; /* The sector where the last write ended */ + int successive_reads; +}; + +/* + * 32 buffers of 2048 bytes + */ +#if (PAGE_SIZE % CD_FRAMESIZE) != 0 +#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE" +#endif +#define PACKET_MAX_SIZE 128 +#define FRAMES_PER_PAGE (PAGE_SIZE / CD_FRAMESIZE) +#define PACKET_MAX_SECTORS (PACKET_MAX_SIZE * CD_FRAMESIZE >> 9) + +enum packet_data_state { + PACKET_IDLE_STATE, /* Not used at the moment */ + PACKET_WAITING_STATE, /* Waiting for more bios to arrive, so */ + /* we don't have to do as much */ + /* data gathering */ + PACKET_READ_WAIT_STATE, /* Waiting for reads to fill in holes */ + PACKET_WRITE_WAIT_STATE, /* Waiting for the write to complete */ + PACKET_RECOVERY_STATE, /* Recover after read/write errors */ + PACKET_FINISHED_STATE, /* After write has finished */ + + PACKET_NUM_STATES /* Number of possible states */ +}; + +/* + * Information needed for writing a single packet + */ +struct pktcdvd_device; + +struct packet_data +{ + struct list_head list; + + spinlock_t lock; /* Lock protecting state transitions and */ + /* orig_bios list */ + + struct bio_list orig_bios; /* Original bios passed to pkt_make_request */ + /* that will be handled by this packet */ + int write_size; /* Total size of all bios in the orig_bios */ + /* list, measured in number of frames */ + + struct bio *w_bio; /* The bio we will send to the real CD */ + /* device once we have all data for the */ + /* packet we are going to write */ + sector_t sector; /* First sector in this packet */ + int frames; /* Number of frames in this packet */ + + enum packet_data_state state; /* Current state */ + atomic_t run_sm; /* Incremented whenever the state */ + /* machine needs to be run */ + long sleep_time; /* Set this to non-zero to make the state */ + /* machine run after this many jiffies. */ + + atomic_t io_wait; /* Number of pending IO operations */ + atomic_t io_errors; /* Number of read/write errors during IO */ + + struct bio *r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */ + struct page *pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE]; + + int cache_valid; /* If non-zero, the data for the zone defined */ + /* by the sector variable is completely cached */ + /* in the pages[] vector. */ + + int id; /* ID number for debugging */ + struct pktcdvd_device *pd; +}; + +struct pkt_rb_node { + struct rb_node rb_node; + struct bio *bio; +}; + +struct packet_stacked_data +{ + struct bio *bio; /* Original read request bio */ + struct pktcdvd_device *pd; +}; +#define PSD_POOL_SIZE 64 + +struct pktcdvd_device +{ + struct block_device *bdev; /* dev attached */ + dev_t pkt_dev; /* our dev */ + char name[20]; + struct packet_settings settings; + struct packet_stats stats; + int refcnt; /* Open count */ + int write_speed; /* current write speed, kB/s */ + int read_speed; /* current read speed, kB/s */ + unsigned long offset; /* start offset */ + __u8 mode_offset; /* 0 / 8 */ + __u8 type; + unsigned long flags; + __u16 mmc3_profile; + __u32 nwa; /* next writable address */ + __u32 lra; /* last recorded address */ + struct packet_cdrw cdrw; + wait_queue_head_t wqueue; + + spinlock_t lock; /* Serialize access to bio_queue */ + struct rb_root bio_queue; /* Work queue of bios we need to handle */ + int bio_queue_size; /* Number of nodes in bio_queue */ + bool congested; /* Someone is waiting for bio_queue_size + * to drop. */ + sector_t current_sector; /* Keep track of where the elevator is */ + atomic_t scan_queue; /* Set to non-zero when pkt_handle_queue */ + /* needs to be run. */ + mempool_t rb_pool; /* mempool for pkt_rb_node allocations */ + + struct packet_iosched iosched; + struct gendisk *disk; + + int write_congestion_off; + int write_congestion_on; + + struct device *dev; /* sysfs pktcdvd[0-7] dev */ + + struct dentry *dfs_d_root; /* debugfs: devname directory */ + struct dentry *dfs_f_info; /* debugfs: info file */ +}; + +#endif /* __PKTCDVD_H */ diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h new file mode 100644 index 000000000000..9cbb55d21c94 --- /dev/null +++ b/include/uapi/linux/pktcdvd.h @@ -0,0 +1,112 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2000 Jens Axboe + * Copyright (C) 2001-2004 Peter Osterlund + * + * May be copied or modified under the terms of the GNU General Public + * License. See linux/COPYING for more information. + * + * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and + * DVD-RW devices. + * + */ +#ifndef _UAPI__PKTCDVD_H +#define _UAPI__PKTCDVD_H + +#include + +/* + * 1 for normal debug messages, 2 is very verbose. 0 to turn it off. + */ +#define PACKET_DEBUG 1 + +#define MAX_WRITERS 8 + +#define PKT_RB_POOL_SIZE 512 + +/* + * How long we should hold a non-full packet before starting data gathering. + */ +#define PACKET_WAIT_TIME (HZ * 5 / 1000) + +/* + * use drive write caching -- we need deferred error handling to be + * able to successfully recover with this option (drive will return good + * status as soon as the cdb is validated). + */ +#if defined(CONFIG_CDROM_PKTCDVD_WCACHE) +#define USE_WCACHING 1 +#else +#define USE_WCACHING 0 +#endif + +/* + * No user-servicable parts beyond this point -> + */ + +/* + * device types + */ +#define PACKET_CDR 1 +#define PACKET_CDRW 2 +#define PACKET_DVDR 3 +#define PACKET_DVDRW 4 + +/* + * flags + */ +#define PACKET_WRITABLE 1 /* pd is writable */ +#define PACKET_NWA_VALID 2 /* next writable address valid */ +#define PACKET_LRA_VALID 3 /* last recorded address valid */ +#define PACKET_MERGE_SEGS 4 /* perform segment merging to keep */ + /* underlying cdrom device happy */ + +/* + * Disc status -- from READ_DISC_INFO + */ +#define PACKET_DISC_EMPTY 0 +#define PACKET_DISC_INCOMPLETE 1 +#define PACKET_DISC_COMPLETE 2 +#define PACKET_DISC_OTHER 3 + +/* + * write type, and corresponding data block type + */ +#define PACKET_MODE1 1 +#define PACKET_MODE2 2 +#define PACKET_BLOCK_MODE1 8 +#define PACKET_BLOCK_MODE2 10 + +/* + * Last session/border status + */ +#define PACKET_SESSION_EMPTY 0 +#define PACKET_SESSION_INCOMPLETE 1 +#define PACKET_SESSION_RESERVED 2 +#define PACKET_SESSION_COMPLETE 3 + +#define PACKET_MCN "4a656e734178626f65323030300000" + +#undef PACKET_USE_LS + +#define PKT_CTRL_CMD_SETUP 0 +#define PKT_CTRL_CMD_TEARDOWN 1 +#define PKT_CTRL_CMD_STATUS 2 + +struct pkt_ctrl_command { + __u32 command; /* in: Setup, teardown, status */ + __u32 dev_index; /* in/out: Device index */ + __u32 dev; /* in/out: Device nr for cdrw device */ + __u32 pkt_dev; /* in/out: Device nr for packet device */ + __u32 num_devices; /* out: Largest device index + 1 */ + __u32 padding; /* Not used */ +}; + +/* + * packet ioctls + */ +#define PACKET_IOCTL_MAGIC ('X') +#define PACKET_CTRL_CMD _IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command) + + +#endif /* _UAPI__PKTCDVD_H */ -- cgit v1.3.1 From f3dc61cde80d48751999c4cb46daf3b2185e6895 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 25 Nov 2022 10:18:26 +0000 Subject: firmware/psci: Fix MEM_PROTECT_RANGE function numbers PSCI v1.1 offers 32-bit and 64-bit variants of the MEM_PROTECT_RANGE call using function identifier 20. Fix the incorrect definitions of the MEM_PROTECT_CHECK_RANGE calls in the PSCI UAPI header. Cc: Dmitry Baryshkov Cc: Lorenzo Pieralisi Cc: Arnd Bergmann Fixes: 3137f2e60098 ("firmware/psci: Add debugfs support to ease debugging") Acked-by: Marc Zyngier Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20221125101826.22404-1-will@kernel.org Signed-off-by: Will Deacon --- include/uapi/linux/psci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/psci.h b/include/uapi/linux/psci.h index 3511095c2702..42a40ad3fb62 100644 --- a/include/uapi/linux/psci.h +++ b/include/uapi/linux/psci.h @@ -58,7 +58,7 @@ #define PSCI_1_1_FN_SYSTEM_RESET2 PSCI_0_2_FN(18) #define PSCI_1_1_FN_MEM_PROTECT PSCI_0_2_FN(19) -#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(19) +#define PSCI_1_1_FN_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN(20) #define PSCI_1_0_FN64_CPU_DEFAULT_SUSPEND PSCI_0_2_FN64(12) #define PSCI_1_0_FN64_NODE_HW_STATE PSCI_0_2_FN64(13) @@ -67,7 +67,7 @@ #define PSCI_1_0_FN64_STAT_COUNT PSCI_0_2_FN64(17) #define PSCI_1_1_FN64_SYSTEM_RESET2 PSCI_0_2_FN64(18) -#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(19) +#define PSCI_1_1_FN64_MEM_PROTECT_CHECK_RANGE PSCI_0_2_FN64(20) /* PSCI v0.2 power state encoding for CPU_SUSPEND function */ #define PSCI_0_2_POWER_STATE_ID_MASK 0xffff -- cgit v1.3.1 From 8d0e3fc61abdf56a403b316e03fa8f89a3108caa Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 21 Dec 2022 11:24:42 +0200 Subject: media: Add 2-10-10-10 RGB formats Add RGBX1010102, RGBA1010102 and ARGB2101010 formats. Signed-off-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Acked-by: Mauro Carvalho Chehab Acked-by: Hans Verkuil Signed-off-by: Laurent Pinchart --- .../userspace-api/media/v4l/pixfmt-rgb.rst | 194 +++++++++++++++++++++ drivers/media/v4l2-core/v4l2-ioctl.c | 3 + include/uapi/linux/videodev2.h | 3 + 3 files changed, 200 insertions(+) (limited to 'include/uapi') diff --git a/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst b/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst index 30f51cd33f99..d330aeb4d3eb 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-rgb.rst @@ -763,6 +763,200 @@ nomenclature that instead use the order of components as seen in a 24- or \normalsize +10 Bits Per Component +===================== + +These formats store a 30-bit RGB triplet with an optional 2 bit alpha in four +bytes. They are named based on the order of the RGB components as seen in a +32-bit word, which is then stored in memory in little endian byte order +(unless otherwise noted by the presence of bit 31 in the 4CC value), and on the +number of bits for each component. + +.. raw:: latex + + \begingroup + \tiny + \setlength{\tabcolsep}{2pt} + +.. tabularcolumns:: |p{2.8cm}|p{2.0cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}|p{0.22cm}| + + +.. flat-table:: RGB Formats 10 Bits Per Color Component + :header-rows: 2 + :stub-columns: 0 + + * - Identifier + - Code + - :cspan:`7` Byte 0 in memory + - :cspan:`7` Byte 1 + - :cspan:`7` Byte 2 + - :cspan:`7` Byte 3 + * - + - + - 7 + - 6 + - 5 + - 4 + - 3 + - 2 + - 1 + - 0 + + - 7 + - 6 + - 5 + - 4 + - 3 + - 2 + - 1 + - 0 + + - 7 + - 6 + - 5 + - 4 + - 3 + - 2 + - 1 + - 0 + + - 7 + - 6 + - 5 + - 4 + - 3 + - 2 + - 1 + - 0 + * .. _V4L2-PIX-FMT-RGBX1010102: + + - ``V4L2_PIX_FMT_RGBX1010102`` + - 'RX30' + + - b\ :sub:`5` + - b\ :sub:`4` + - b\ :sub:`3` + - b\ :sub:`2` + - b\ :sub:`1` + - b\ :sub:`0` + - x + - x + + - g\ :sub:`3` + - g\ :sub:`2` + - g\ :sub:`1` + - g\ :sub:`0` + - b\ :sub:`9` + - b\ :sub:`8` + - b\ :sub:`7` + - b\ :sub:`6` + + - r\ :sub:`1` + - r\ :sub:`0` + - g\ :sub:`9` + - g\ :sub:`8` + - g\ :sub:`7` + - g\ :sub:`6` + - g\ :sub:`5` + - g\ :sub:`4` + + - r\ :sub:`9` + - r\ :sub:`8` + - r\ :sub:`7` + - r\ :sub:`6` + - r\ :sub:`5` + - r\ :sub:`4` + - r\ :sub:`3` + - r\ :sub:`2` + - + * .. _V4L2-PIX-FMT-RGBA1010102: + + - ``V4L2_PIX_FMT_RGBA1010102`` + - 'RA30' + + - b\ :sub:`5` + - b\ :sub:`4` + - b\ :sub:`3` + - b\ :sub:`2` + - b\ :sub:`1` + - b\ :sub:`0` + - a\ :sub:`1` + - a\ :sub:`0` + + - g\ :sub:`3` + - g\ :sub:`2` + - g\ :sub:`1` + - g\ :sub:`0` + - b\ :sub:`9` + - b\ :sub:`8` + - b\ :sub:`7` + - b\ :sub:`6` + + - r\ :sub:`1` + - r\ :sub:`0` + - g\ :sub:`9` + - g\ :sub:`8` + - g\ :sub:`7` + - g\ :sub:`6` + - g\ :sub:`5` + - g\ :sub:`4` + + - r\ :sub:`9` + - r\ :sub:`8` + - r\ :sub:`7` + - r\ :sub:`6` + - r\ :sub:`5` + - r\ :sub:`4` + - r\ :sub:`3` + - r\ :sub:`2` + - + * .. _V4L2-PIX-FMT-ARGB2101010: + + - ``V4L2_PIX_FMT_ARGB2101010`` + - 'AR30' + + - b\ :sub:`7` + - b\ :sub:`6` + - b\ :sub:`5` + - b\ :sub:`4` + - b\ :sub:`3` + - b\ :sub:`2` + - b\ :sub:`1` + - b\ :sub:`0` + + - g\ :sub:`5` + - g\ :sub:`4` + - g\ :sub:`3` + - g\ :sub:`2` + - g\ :sub:`1` + - g\ :sub:`0` + - b\ :sub:`9` + - b\ :sub:`8` + + - r\ :sub:`3` + - r\ :sub:`2` + - r\ :sub:`1` + - r\ :sub:`0` + - g\ :sub:`9` + - g\ :sub:`8` + - g\ :sub:`7` + - g\ :sub:`6` + + - a\ :sub:`1` + - a\ :sub:`0` + - r\ :sub:`9` + - r\ :sub:`8` + - r\ :sub:`7` + - r\ :sub:`6` + - r\ :sub:`5` + - r\ :sub:`4` + - + +.. raw:: latex + + \endgroup + + Deprecated RGB Formats ====================== diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 8e0a0ff62a70..b58615bab790 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1298,6 +1298,9 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_PIX_FMT_BGRX32: descr = "32-bit XBGR 8-8-8-8"; break; case V4L2_PIX_FMT_RGBA32: descr = "32-bit RGBA 8-8-8-8"; break; case V4L2_PIX_FMT_RGBX32: descr = "32-bit RGBX 8-8-8-8"; break; + case V4L2_PIX_FMT_RGBX1010102: descr = "32-bit RGBX 10-10-10-2"; break; + case V4L2_PIX_FMT_RGBA1010102: descr = "32-bit RGBA 10-10-10-2"; break; + case V4L2_PIX_FMT_ARGB2101010: descr = "32-bit ARGB 2-10-10-10"; break; case V4L2_PIX_FMT_GREY: descr = "8-bit Greyscale"; break; case V4L2_PIX_FMT_Y4: descr = "4-bit Greyscale"; break; case V4L2_PIX_FMT_Y6: descr = "6-bit Greyscale"; break; diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 1befd181a4cc..a573aea5acbd 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -576,6 +576,9 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_RGBX32 v4l2_fourcc('X', 'B', '2', '4') /* 32 RGBX-8-8-8-8 */ #define V4L2_PIX_FMT_ARGB32 v4l2_fourcc('B', 'A', '2', '4') /* 32 ARGB-8-8-8-8 */ #define V4L2_PIX_FMT_XRGB32 v4l2_fourcc('B', 'X', '2', '4') /* 32 XRGB-8-8-8-8 */ +#define V4L2_PIX_FMT_RGBX1010102 v4l2_fourcc('R', 'X', '3', '0') /* 32 RGBX-10-10-10-2 */ +#define V4L2_PIX_FMT_RGBA1010102 v4l2_fourcc('R', 'A', '3', '0') /* 32 RGBA-10-10-10-2 */ +#define V4L2_PIX_FMT_ARGB2101010 v4l2_fourcc('A', 'R', '3', '0') /* 32 ARGB-2-10-10-10 */ /* Grey formats */ #define V4L2_PIX_FMT_GREY v4l2_fourcc('G', 'R', 'E', 'Y') /* 8 Greyscale */ -- cgit v1.3.1 From 0dc1d7a79a8d13e316d3b168e9fc57e376099c7a Mon Sep 17 00:00:00 2001 From: Tomi Valkeinen Date: Wed, 21 Dec 2022 11:24:43 +0200 Subject: media: Add Y210, Y212 and Y216 formats Add Y210, Y212 and Y216 formats. Signed-off-by: Tomi Valkeinen Reviewed-by: Laurent Pinchart Acked-by: Mauro Carvalho Chehab Acked-by: Hans Verkuil Signed-off-by: Laurent Pinchart --- .../userspace-api/media/v4l/pixfmt-packed-yuv.rst | 49 +++++++++++++++++++++- drivers/media/v4l2-core/v4l2-ioctl.c | 3 ++ include/uapi/linux/videodev2.h | 8 ++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/userspace-api/media/v4l/pixfmt-packed-yuv.rst b/Documentation/userspace-api/media/v4l/pixfmt-packed-yuv.rst index bf283a1b5581..24a771542059 100644 --- a/Documentation/userspace-api/media/v4l/pixfmt-packed-yuv.rst +++ b/Documentation/userspace-api/media/v4l/pixfmt-packed-yuv.rst @@ -262,7 +262,12 @@ the second byte and Y'\ :sub:`7-0` in the third byte. ================= These formats, commonly referred to as YUYV or YUY2, subsample the chroma -components horizontally by 2, storing 2 pixels in 4 bytes. +components horizontally by 2, storing 2 pixels in a container. The container +is 32-bits for 8-bit formats, and 64-bits for 10+-bit formats. + +The packed YUYV formats with more than 8 bits per component are stored as four +16-bit little-endian words. Each word's most significant bits contain one +component, and the least significant bits are zero padding. .. raw:: latex @@ -270,7 +275,7 @@ components horizontally by 2, storing 2 pixels in 4 bytes. .. tabularcolumns:: |p{3.4cm}|p{1.2cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}| -.. flat-table:: Packed YUV 4:2:2 Formats +.. flat-table:: Packed YUV 4:2:2 Formats in 32-bit container :header-rows: 1 :stub-columns: 0 @@ -337,6 +342,46 @@ components horizontally by 2, storing 2 pixels in 4 bytes. - Y'\ :sub:`3` - Cb\ :sub:`2` +.. tabularcolumns:: |p{3.4cm}|p{1.2cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}|p{0.8cm}| + +.. flat-table:: Packed YUV 4:2:2 Formats in 64-bit container + :header-rows: 1 + :stub-columns: 0 + + * - Identifier + - Code + - Word 0 + - Word 1 + - Word 2 + - Word 3 + * .. _V4L2-PIX-FMT-Y210: + + - ``V4L2_PIX_FMT_Y210`` + - 'Y210' + + - Y'\ :sub:`0` (bits 15-6) + - Cb\ :sub:`0` (bits 15-6) + - Y'\ :sub:`1` (bits 15-6) + - Cr\ :sub:`0` (bits 15-6) + * .. _V4L2-PIX-FMT-Y212: + + - ``V4L2_PIX_FMT_Y212`` + - 'Y212' + + - Y'\ :sub:`0` (bits 15-4) + - Cb\ :sub:`0` (bits 15-4) + - Y'\ :sub:`1` (bits 15-4) + - Cr\ :sub:`0` (bits 15-4) + * .. _V4L2-PIX-FMT-Y216: + + - ``V4L2_PIX_FMT_Y216`` + - 'Y216' + + - Y'\ :sub:`0` (bits 15-0) + - Cb\ :sub:`0` (bits 15-0) + - Y'\ :sub:`1` (bits 15-0) + - Cr\ :sub:`0` (bits 15-0) + .. raw:: latex \normalsize diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index b58615bab790..2a0139c72d29 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -1445,6 +1445,9 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt) case V4L2_PIX_FMT_NV12M_8L128: descr = "NV12M (8x128 Linear)"; break; case V4L2_PIX_FMT_NV12_10BE_8L128: descr = "10-bit NV12 (8x128 Linear, BE)"; break; case V4L2_PIX_FMT_NV12M_10BE_8L128: descr = "10-bit NV12M (8x128 Linear, BE)"; break; + case V4L2_PIX_FMT_Y210: descr = "10-bit YUYV Packed"; break; + case V4L2_PIX_FMT_Y212: descr = "12-bit YUYV Packed"; break; + case V4L2_PIX_FMT_Y216: descr = "16-bit YUYV Packed"; break; default: /* Compressed formats */ diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index a573aea5acbd..17a9b975177a 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -621,6 +621,14 @@ struct v4l2_pix_format { #define V4L2_PIX_FMT_YUVX32 v4l2_fourcc('Y', 'U', 'V', 'X') /* 32 YUVX-8-8-8-8 */ #define V4L2_PIX_FMT_M420 v4l2_fourcc('M', '4', '2', '0') /* 12 YUV 4:2:0 2 lines y, 1 line uv interleaved */ +/* + * YCbCr packed format. For each Y2xx format, xx bits of valid data occupy the MSBs + * of the 16 bit components, and 16-xx bits of zero padding occupy the LSBs. + */ +#define V4L2_PIX_FMT_Y210 v4l2_fourcc('Y', '2', '1', '0') /* 32 YUYV 4:2:2 */ +#define V4L2_PIX_FMT_Y212 v4l2_fourcc('Y', '2', '1', '2') /* 32 YUYV 4:2:2 */ +#define V4L2_PIX_FMT_Y216 v4l2_fourcc('Y', '2', '1', '6') /* 32 YUYV 4:2:2 */ + /* two planes -- one Y, one Cr + Cb interleaved */ #define V4L2_PIX_FMT_NV12 v4l2_fourcc('N', 'V', '1', '2') /* 12 Y/CbCr 4:2:0 */ #define V4L2_PIX_FMT_NV21 v4l2_fourcc('N', 'V', '2', '1') /* 12 Y/CrCb 4:2:0 */ -- cgit v1.3.1 From 13bd9b31a969b03c8ec1d4eb0f2b9aebd30ebfd8 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Tue, 24 Jan 2023 02:47:20 +0100 Subject: Revert "netfilter: conntrack: add sctp DATA_SENT state" This reverts commit (bff3d0534804: "netfilter: conntrack: add sctp DATA_SENT state") Using DATA/SACK to detect a new connection on secondary/alternate paths works only on new connections, while a HEARTBEAT is required on connection re-use. It is probably consistent to wait for HEARTBEAT to create a secondary connection in conntrack. Signed-off-by: Sriram Yagnaraman Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_sctp.h | 1 - include/uapi/linux/netfilter/nfnetlink_cttimeout.h | 1 - net/netfilter/nf_conntrack_proto_sctp.c | 102 +++++++++------------ net/netfilter/nf_conntrack_standalone.c | 8 -- 4 files changed, 42 insertions(+), 70 deletions(-) (limited to 'include/uapi') diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h index c742469afe21..edc6ddab0de6 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h @@ -16,7 +16,6 @@ enum sctp_conntrack { SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, SCTP_CONNTRACK_HEARTBEAT_SENT, SCTP_CONNTRACK_HEARTBEAT_ACKED, - SCTP_CONNTRACK_DATA_SENT, SCTP_CONNTRACK_MAX }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index 94e74034706d..6b20fb22717b 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -95,7 +95,6 @@ enum ctattr_timeout_sctp { CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, - CTA_TIMEOUT_SCTP_DATA_SENT, __CTA_TIMEOUT_SCTP_MAX }; #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index c561c1213704..01cf3e06f042 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -60,7 +60,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, - [SCTP_CONNTRACK_DATA_SENT] = 30 SECS, }; #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 @@ -75,7 +74,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT #define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED -#define sDS SCTP_CONNTRACK_DATA_SENT #define sIV SCTP_CONNTRACK_MAX /* @@ -98,10 +96,9 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of the SHUTDOWN chunk. Connection is closed. HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. -HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK/DATA/SACK in the direction - opposite to that of the HEARTBEAT/DATA chunk. Secondary connection - is established. -DATA_SENT - We have seen a DATA/SACK in a new flow. +HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to + that of the HEARTBEAT chunk. Secondary connection is + established. */ /* TODO @@ -115,38 +112,36 @@ cookie echoed to closed. */ /* SCTP conntrack state transitions */ -static const u8 sctp_conntracks[2][12][SCTP_CONNTRACK_MAX] = { +static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */ -/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA, sCW}, -/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL}, -/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS, sCL}, -/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA, sSA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't have Stale cookie*/ -/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't come in orig dir */ -/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA, sCL}, -/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}, -/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}, -/* data/sack */ {sDS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ +/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, +/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, +/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} }, { /* REPLY */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */ -/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV}, -/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL, sIV}, -/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR, sIV}, -/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA, sIV}, -/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA, sIV}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* Can't come in reply dir */ -/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA, sIV}, -/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA, sIV}, -/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sHA}, -/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA}, -/* data/sack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA}, +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, +/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, +/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} } }; @@ -258,11 +253,6 @@ static int sctp_new_state(enum ip_conntrack_dir dir, pr_debug("SCTP_CID_HEARTBEAT_ACK"); i = 10; break; - case SCTP_CID_DATA: - case SCTP_CID_SACK: - pr_debug("SCTP_CID_DATA/SACK"); - i = 11; - break; default: /* Other chunks like DATA or SACK do not change the state */ pr_debug("Unknown chunk type, Will stay in %s\n", @@ -316,9 +306,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, ih->init_tag); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; - } else if (sch->type == SCTP_CID_HEARTBEAT || - sch->type == SCTP_CID_DATA || - sch->type == SCTP_CID_SACK) { + } else if (sch->type == SCTP_CID_HEARTBEAT) { pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; @@ -404,19 +392,19 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, if (!sctp_new(ct, skb, sh, dataoff)) return -NF_ACCEPT; - } else { - /* Check the verification tag (Sec 8.5) */ - if (!test_bit(SCTP_CID_INIT, map) && - !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && - !test_bit(SCTP_CID_COOKIE_ECHO, map) && - !test_bit(SCTP_CID_ABORT, map) && - !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && - !test_bit(SCTP_CID_HEARTBEAT, map) && - !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && - sh->vtag != ct->proto.sctp.vtag[dir]) { - pr_debug("Verification tag check failed\n"); - goto out; - } + } + + /* Check the verification tag (Sec 8.5) */ + if (!test_bit(SCTP_CID_INIT, map) && + !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && + !test_bit(SCTP_CID_COOKIE_ECHO, map) && + !test_bit(SCTP_CID_ABORT, map) && + !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && + !test_bit(SCTP_CID_HEARTBEAT, map) && + !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && + sh->vtag != ct->proto.sctp.vtag[dir]) { + pr_debug("Verification tag check failed\n"); + goto out; } old_state = new_state = SCTP_CONNTRACK_NONE; @@ -483,11 +471,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) { ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED; } - } else if (sch->type == SCTP_CID_DATA || sch->type == SCTP_CID_SACK) { - if (ct->proto.sctp.vtag[dir] == 0) { - pr_debug("Setting vtag %x for dir %d\n", sh->vtag, dir); - ct->proto.sctp.vtag[dir] = sh->vtag; - } } old_state = ct->proto.sctp.state; @@ -708,7 +691,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, - [CTA_TIMEOUT_SCTP_DATA_SENT] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0250725e38a4..bca839ab1ae8 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -602,7 +602,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, - NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, @@ -893,12 +892,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT] = { - .procname = "nf_conntrack_sctp_timeout_data_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { @@ -1043,7 +1036,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, XASSIGN(SHUTDOWN_ACK_SENT, sn); XASSIGN(HEARTBEAT_SENT, sn); XASSIGN(HEARTBEAT_ACKED, sn); - XASSIGN(DATA_SENT, sn); #undef XASSIGN #endif } -- cgit v1.3.1 From a44b7651489f26271ac784b70895e8a85d0cebf4 Mon Sep 17 00:00:00 2001 From: Sriram Yagnaraman Date: Tue, 24 Jan 2023 02:47:21 +0100 Subject: netfilter: conntrack: unify established states for SCTP paths An SCTP endpoint can start an association through a path and tear it down over another one. That means the initial path will not see the shutdown sequence, and the conntrack entry will remain in ESTABLISHED state for 5 days. By merging the HEARTBEAT_ACKED and ESTABLISHED states into one ESTABLISHED state, there remains no difference between a primary or secondary path. The timeout for the merged ESTABLISHED state is set to 210 seconds (hb_interval * max_path_retrans + rto_max). So, even if a path doesn't see the shutdown sequence, it will expire in a reasonable amount of time. With this change in place, there is now more than one state from which we can transition to ESTABLISHED, COOKIE_ECHOED and HEARTBEAT_SENT, so handle the setting of ASSURED bit whenever a state change has happened and the new state is ESTABLISHED. Removed the check for dir==REPLY since the transition to ESTABLISHED can happen only in the reply direction. Fixes: 9fb9cbb1082d ("[NETFILTER]: Add nf_conntrack subsystem.") Signed-off-by: Sriram Yagnaraman Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 10 +-- include/uapi/linux/netfilter/nf_conntrack_sctp.h | 2 +- include/uapi/linux/netfilter/nfnetlink_cttimeout.h | 2 +- net/netfilter/nf_conntrack_proto_sctp.c | 93 +++++++++------------- net/netfilter/nf_conntrack_standalone.c | 8 -- 5 files changed, 44 insertions(+), 71 deletions(-) (limited to 'include/uapi') diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 49db1d11d7c4..8b1045c3b59e 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -173,7 +173,9 @@ nf_conntrack_sctp_timeout_cookie_echoed - INTEGER (seconds) default 3 nf_conntrack_sctp_timeout_established - INTEGER (seconds) - default 432000 (5 days) + default 210 + + Default is set to (hb_interval * path_max_retrans + rto_max) nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds) default 0.3 @@ -190,12 +192,6 @@ nf_conntrack_sctp_timeout_heartbeat_sent - INTEGER (seconds) This timeout is used to setup conntrack entry on secondary paths. Default is set to hb_interval. -nf_conntrack_sctp_timeout_heartbeat_acked - INTEGER (seconds) - default 210 - - This timeout is used to setup conntrack entry on secondary paths. - Default is set to (hb_interval * path_max_retrans + rto_max) - nf_conntrack_udp_timeout - INTEGER (seconds) default 30 diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h index edc6ddab0de6..2d6f80d75ae7 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h @@ -15,7 +15,7 @@ enum sctp_conntrack { SCTP_CONNTRACK_SHUTDOWN_RECD, SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, SCTP_CONNTRACK_HEARTBEAT_SENT, - SCTP_CONNTRACK_HEARTBEAT_ACKED, + SCTP_CONNTRACK_HEARTBEAT_ACKED, /* no longer used */ SCTP_CONNTRACK_MAX }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index 6b20fb22717b..aa805e6d4e28 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -94,7 +94,7 @@ enum ctattr_timeout_sctp { CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, - CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, + CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, /* no longer used */ __CTA_TIMEOUT_SCTP_MAX }; #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 01cf3e06f042..945dd40e7077 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -27,22 +27,16 @@ #include #include -/* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR - - And so for me for SCTP :D -Kiran */ - static const char *const sctp_conntrack_names[] = { - "NONE", - "CLOSED", - "COOKIE_WAIT", - "COOKIE_ECHOED", - "ESTABLISHED", - "SHUTDOWN_SENT", - "SHUTDOWN_RECD", - "SHUTDOWN_ACK_SENT", - "HEARTBEAT_SENT", - "HEARTBEAT_ACKED", + [SCTP_CONNTRACK_NONE] = "NONE", + [SCTP_CONNTRACK_CLOSED] = "CLOSED", + [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT", + [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED", + [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED", + [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT", + [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD", + [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT", + [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT", }; #define SECS * HZ @@ -54,12 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_CLOSED] = 10 SECS, [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, - [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, + [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, - [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, }; #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 @@ -73,7 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT -#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED #define sIV SCTP_CONNTRACK_MAX /* @@ -96,9 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of the SHUTDOWN chunk. Connection is closed. HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. -HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK in the direction opposite to - that of the HEARTBEAT chunk. Secondary connection is - established. */ /* TODO @@ -115,33 +104,33 @@ cookie echoed to closed. static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA}, -/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA}, -/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS}, -/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't have Stale cookie*/ -/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA},/* Can't come in orig dir */ -/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA}, -/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, -/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, +/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, }, { /* REPLY */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA */ -/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA}, -/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL}, -/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR}, -/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA}, -/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA},/* Can't come in reply dir */ -/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA}, -/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA}, -/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA}, -/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, +/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES}, } }; @@ -508,8 +497,12 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } ct->proto.sctp.state = new_state; - if (old_state != new_state) + if (old_state != new_state) { nf_conntrack_event_cache(IPCT_PROTOINFO, ct); + if (new_state == SCTP_CONNTRACK_ESTABLISHED && + !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_ASSURED, ct); + } } spin_unlock_bh(&ct->lock); @@ -523,14 +516,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); - if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && - dir == IP_CT_DIR_REPLY && - new_state == SCTP_CONNTRACK_ESTABLISHED) { - pr_debug("Setting assured bit\n"); - set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } - return NF_ACCEPT; out_unlock: diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index bca839ab1ae8..460294bd4b60 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -601,7 +601,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, - NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, @@ -886,12 +885,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { - .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { @@ -1035,7 +1028,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, XASSIGN(SHUTDOWN_RECD, sn); XASSIGN(SHUTDOWN_ACK_SENT, sn); XASSIGN(HEARTBEAT_SENT, sn); - XASSIGN(HEARTBEAT_ACKED, sn); #undef XASSIGN #endif } -- cgit v1.3.1 From d70885800c4b87505171216796f91be94d9ed2cf Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Mon, 14 Nov 2022 12:16:37 +0200 Subject: habanalabs: modify export dmabuf API A previous commit deprecated the option to export from handle, leaving the code with no support for devices with virtual memory. This commit modifies the export API in a way that unifies the uAPI to user address for both cases (i.e. with and without MMU support) and add the actual support for devices with virtual memory. Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/habanalabs.h | 9 ++ drivers/misc/habanalabs/common/memory.c | 219 ++++++++++++++++++++++++---- include/uapi/misc/habanalabs.h | 21 ++- 3 files changed, 218 insertions(+), 31 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index e68928b59c1e..ef5a765f3313 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1744,6 +1744,9 @@ struct hl_cs_counters_atomic { * struct hl_dmabuf_priv - a dma-buf private object. * @dmabuf: pointer to dma-buf object. * @ctx: pointer to the dma-buf owner's context. + * @phys_pg_pack: pointer to physical page pack if the dma-buf was exported + * where virtual memory is supported. + * @memhash_hnode: pointer to the memhash node. this object holds the export count. * @device_address: physical address of the device's memory. Relevant only * if phys_pg_pack is NULL (dma-buf was exported from address). * The total size can be taken from the dmabuf object. @@ -1751,6 +1754,8 @@ struct hl_cs_counters_atomic { struct hl_dmabuf_priv { struct dma_buf *dmabuf; struct hl_ctx *ctx; + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct hl_vm_hash_node *memhash_hnode; uint64_t device_address; }; @@ -2078,12 +2083,16 @@ struct hl_cs_parser { * hl_userptr). * @node: node to hang on the hash table in context object. * @vaddr: key virtual address. + * @handle: memory handle for device memory allocation. * @ptr: value pointer (hl_vm_phys_pg_list or hl_userptr). + * @export_cnt: number of exports from within the VA block. */ struct hl_vm_hash_node { struct hlist_node node; u64 vaddr; + u64 handle; void *ptr; + int export_cnt; }; /** diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index ba7b2ee09408..04a04b50ce87 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -19,7 +19,9 @@ MODULE_IMPORT_NS(DMA_BUF); #define HL_MMU_DEBUG 0 /* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */ -#define DRAM_POOL_PAGE_SIZE SZ_8M +#define DRAM_POOL_PAGE_SIZE SZ_8M + +#define MEM_HANDLE_INVALID ULONG_MAX static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle); @@ -1234,6 +1236,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device hnode->ptr = vm_type; hnode->vaddr = ret_vaddr; + hnode->handle = is_userptr ? MEM_HANDLE_INVALID : handle; mutex_lock(&ctx->mem_hash_lock); hash_add(ctx->mem_hash, &hnode->node, ret_vaddr); @@ -1307,6 +1310,12 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, return -EINVAL; } + if (hnode->export_cnt) { + mutex_unlock(&ctx->mem_hash_lock); + dev_err(hdev->dev, "failed to unmap %#llx, memory is exported\n", vaddr); + return -EINVAL; + } + hash_del(&hnode->node); mutex_unlock(&ctx->mem_hash_lock); @@ -1694,19 +1703,29 @@ static struct sg_table *hl_map_dmabuf(struct dma_buf_attachment *attachment, enum dma_data_direction dir) { struct dma_buf *dma_buf = attachment->dmabuf; + struct hl_vm_phys_pg_pack *phys_pg_pack; struct hl_dmabuf_priv *hl_dmabuf; struct hl_device *hdev; struct sg_table *sgt; hl_dmabuf = dma_buf->priv; hdev = hl_dmabuf->ctx->hdev; + phys_pg_pack = hl_dmabuf->phys_pg_pack; if (!attachment->peer2peer) { dev_dbg(hdev->dev, "Failed to map dmabuf because p2p is disabled\n"); return ERR_PTR(-EPERM); } - sgt = alloc_sgt_from_device_pages(hdev, + if (phys_pg_pack) + sgt = alloc_sgt_from_device_pages(hdev, + phys_pg_pack->pages, + phys_pg_pack->npages, + phys_pg_pack->page_size, + attachment->dev, + dir); + else + sgt = alloc_sgt_from_device_pages(hdev, &hl_dmabuf->device_address, 1, hl_dmabuf->dmabuf->size, @@ -1747,8 +1766,15 @@ static void hl_unmap_dmabuf(struct dma_buf_attachment *attachment, static void hl_release_dmabuf(struct dma_buf *dmabuf) { struct hl_dmabuf_priv *hl_dmabuf = dmabuf->priv; + struct hl_ctx *ctx = hl_dmabuf->ctx; + + if (hl_dmabuf->memhash_hnode) { + mutex_lock(&ctx->mem_hash_lock); + hl_dmabuf->memhash_hnode->export_cnt--; + mutex_unlock(&ctx->mem_hash_lock); + } - hl_ctx_put(hl_dmabuf->ctx); + hl_ctx_put(ctx); kfree(hl_dmabuf); } @@ -1797,11 +1823,8 @@ err_dma_buf_put: return rc; } -static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 size) +static int validate_export_params_common(struct hl_device *hdev, u64 device_addr, u64 size) { - struct asic_fixed_properties *prop = &hdev->asic_prop; - u64 bar_address; - if (!IS_ALIGNED(device_addr, PAGE_SIZE)) { dev_dbg(hdev->dev, "exported device memory address 0x%llx should be aligned to 0x%lx\n", @@ -1816,6 +1839,19 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s return -EINVAL; } + return 0; +} + +static int validate_export_params_no_mmu(struct hl_device *hdev, u64 device_addr, u64 size) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 bar_address; + int rc; + + rc = validate_export_params_common(hdev, device_addr, size); + if (rc) + return rc; + if (device_addr < prop->dram_user_base_address || (device_addr + size) > prop->dram_end_address || (device_addr + size) < device_addr) { @@ -1838,12 +1874,115 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s return 0; } +static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 size, u64 offset, + struct hl_vm_phys_pg_pack *phys_pg_pack) +{ + struct asic_fixed_properties *prop = &hdev->asic_prop; + u64 bar_address; + int i, rc; + + rc = validate_export_params_common(hdev, device_addr, size); + if (rc) + return rc; + + if ((offset + size) > phys_pg_pack->total_size) { + dev_dbg(hdev->dev, "offset %#llx and size %#llx exceed total map size %#llx\n", + offset, size, phys_pg_pack->total_size); + return -EINVAL; + } + + for (i = 0 ; i < phys_pg_pack->npages ; i++) { + + bar_address = hdev->dram_pci_bar_start + + (phys_pg_pack->pages[i] - prop->dram_base_address); + + if ((bar_address + phys_pg_pack->page_size) > + (hdev->dram_pci_bar_start + prop->dram_pci_bar_size) || + (bar_address + phys_pg_pack->page_size) < bar_address) { + dev_dbg(hdev->dev, + "DRAM memory range 0x%llx (+0x%x) is outside of PCI BAR boundaries\n", + phys_pg_pack->pages[i], + phys_pg_pack->page_size); + + return -EINVAL; + } + } + + return 0; +} + +static struct hl_vm_hash_node *memhash_node_export_get(struct hl_ctx *ctx, u64 addr) +{ + struct hl_device *hdev = ctx->hdev; + struct hl_vm_hash_node *hnode; + + /* get the memory handle */ + mutex_lock(&ctx->mem_hash_lock); + hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)addr) + if (addr == hnode->vaddr) + break; + + if (!hnode) { + mutex_unlock(&ctx->mem_hash_lock); + dev_dbg(hdev->dev, "map address %#llx not found\n", addr); + return ERR_PTR(-EINVAL); + } + + if (upper_32_bits(hnode->handle)) { + mutex_unlock(&ctx->mem_hash_lock); + dev_dbg(hdev->dev, "invalid handle %#llx for map address %#llx\n", + hnode->handle, addr); + return ERR_PTR(-EINVAL); + } + + /* + * node found, increase export count so this memory cannot be unmapped + * and the hash node cannot be deleted. + */ + hnode->export_cnt++; + mutex_unlock(&ctx->mem_hash_lock); + + return hnode; +} + +static void memhash_node_export_put(struct hl_ctx *ctx, struct hl_vm_hash_node *hnode) +{ + mutex_lock(&ctx->mem_hash_lock); + hnode->export_cnt--; + mutex_unlock(&ctx->mem_hash_lock); +} + +static struct hl_vm_phys_pg_pack *get_phys_pg_pack_from_hash_node(struct hl_device *hdev, + struct hl_vm_hash_node *hnode) +{ + struct hl_vm_phys_pg_pack *phys_pg_pack; + struct hl_vm *vm = &hdev->vm; + + spin_lock(&vm->idr_lock); + phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, (u32) hnode->handle); + if (!phys_pg_pack) { + spin_unlock(&vm->idr_lock); + dev_dbg(hdev->dev, "no match for handle 0x%x\n", (u32) hnode->handle); + return ERR_PTR(-EINVAL); + } + + spin_unlock(&vm->idr_lock); + + if (phys_pg_pack->vm_type != VM_TYPE_PHYS_PACK) { + dev_dbg(hdev->dev, "handle 0x%llx does not represent DRAM memory\n", hnode->handle); + return ERR_PTR(-EINVAL); + } + + return phys_pg_pack; +} + /** * export_dmabuf_from_addr() - export a dma-buf object for the given memory * address and size. * @ctx: pointer to the context structure. - * @device_addr: device memory physical address. - * @size: size of device memory. + * @addr: device address. + * @size: size of device memory to export. + * @offset: the offset into the buffer from which to start exporting * @flags: DMA-BUF file/FD flags. * @dmabuf_fd: pointer to result FD that represents the dma-buf object. * @@ -1853,37 +1992,66 @@ static int validate_export_params(struct hl_device *hdev, u64 device_addr, u64 s * * Return: 0 on success, non-zero for failure. */ -static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 device_addr, - u64 size, int flags, int *dmabuf_fd) +static int export_dmabuf_from_addr(struct hl_ctx *ctx, u64 addr, u64 size, u64 offset, + int flags, int *dmabuf_fd) { - struct hl_dmabuf_priv *hl_dmabuf; - struct hl_device *hdev = ctx->hdev; + struct hl_vm_phys_pg_pack *phys_pg_pack = NULL; + struct hl_vm_hash_node *hnode = NULL; struct asic_fixed_properties *prop; + struct hl_dmabuf_priv *hl_dmabuf; + struct hl_device *hdev; + u64 export_addr; int rc; + hdev = ctx->hdev; prop = &hdev->asic_prop; - if (prop->dram_supports_virtual_memory) { - dev_dbg(hdev->dev, "Export not supported for devices with virtual memory\n"); - return -EOPNOTSUPP; + /* offset must be 0 in devices without virtual memory support */ + if (!prop->dram_supports_virtual_memory && offset) { + dev_dbg(hdev->dev, "offset is not allowed in device without virtual memory\n"); + return -EINVAL; } - rc = validate_export_params(hdev, device_addr, size); - if (rc) - return rc; + export_addr = addr + offset; hl_dmabuf = kzalloc(sizeof(*hl_dmabuf), GFP_KERNEL); if (!hl_dmabuf) return -ENOMEM; - hl_dmabuf->device_address = device_addr; + if (prop->dram_supports_virtual_memory) { + hnode = memhash_node_export_get(ctx, addr); + if (IS_ERR(hnode)) { + rc = PTR_ERR(hnode); + goto err_free_dmabuf_wrapper; + } + phys_pg_pack = get_phys_pg_pack_from_hash_node(hdev, hnode); + if (IS_ERR(phys_pg_pack)) { + rc = PTR_ERR(phys_pg_pack); + goto dec_memhash_export_cnt; + } + rc = validate_export_params(hdev, export_addr, size, offset, phys_pg_pack); + if (rc) + goto dec_memhash_export_cnt; + + hl_dmabuf->phys_pg_pack = phys_pg_pack; + hl_dmabuf->memhash_hnode = hnode; + } else { + rc = validate_export_params_no_mmu(hdev, export_addr, size); + if (rc) + goto err_free_dmabuf_wrapper; + } + + hl_dmabuf->device_address = export_addr; rc = export_dmabuf(ctx, hl_dmabuf, size, flags, dmabuf_fd); if (rc) - goto err_free_dmabuf_wrapper; + goto dec_memhash_export_cnt; return 0; +dec_memhash_export_cnt: + if (prop->dram_supports_virtual_memory) + memhash_node_export_put(ctx, hnode); err_free_dmabuf_wrapper: kfree(hl_dmabuf); return rc; @@ -2160,10 +2328,11 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) case HL_MEM_OP_EXPORT_DMABUF_FD: rc = export_dmabuf_from_addr(ctx, - args->in.export_dmabuf_fd.handle, - args->in.export_dmabuf_fd.mem_size, - args->in.flags, - &dmabuf_fd); + args->in.export_dmabuf_fd.addr, + args->in.export_dmabuf_fd.mem_size, + args->in.export_dmabuf_fd.offset, + args->in.flags, + &dmabuf_fd); memset(args, 0, sizeof(*args)); args->out.fd = dmabuf_fd; break; diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index 3b995e841eb8..c67d18901c1d 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -1851,15 +1851,24 @@ struct hl_mem_in { /** * structure for exporting DMABUF object (used with * the HL_MEM_OP_EXPORT_DMABUF_FD op) - * @handle: handle returned from HL_MEM_OP_ALLOC. - * in Gaudi, where we don't have MMU for the device memory, the - * driver expects a physical address (instead of a handle) in the - * device memory space. - * @mem_size: size of memory allocation. Relevant only for GAUDI + * @addr: for Gaudi1, the driver expects a physical address + * inside the device's DRAM. this is because in Gaudi1 + * we don't have MMU that covers the device's DRAM. + * for all other ASICs, the driver expects a device + * virtual address that represents the start address of + * a mapped DRAM memory area inside the device. + * the address must be the same as was received from the + * driver during a previous HL_MEM_OP_MAP operation. + * @mem_size: size of memory to export. + * @offset: for Gaudi1, this value must be 0. For all other ASICs, + * the driver expects an offset inside of the memory area + * describe by addr. the offset represents the start + * address of that the exported dma-buf object describes. */ struct { - __u64 handle; + __u64 addr; __u64 mem_size; + __u64 offset; } export_dmabuf_fd; }; -- cgit v1.3.1 From c2239a251d2d738f435c46bae7d66899c62ce493 Mon Sep 17 00:00:00 2001 From: farah kassabri Date: Wed, 16 Nov 2022 15:40:30 +0200 Subject: habanalabs: pass-through request from user to f/w Add a uAPI, as part of the INFO IOCTL, to allow users to send requests directly to f/w, according to a pre-defined set of opcodes that the f/w exposes. The f/w will put the result in a kernel-allocated buffer, which the driver will then copy to the user-supplied buffer. This will allow f/w tools to communicate directly with the f/w without the need to add a new uAPI to the driver for each new type of request. Signed-off-by: farah kassabri Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/firmware_if.c | 24 +++++++++ drivers/misc/habanalabs/common/habanalabs.h | 2 + drivers/misc/habanalabs/common/habanalabs_ioctl.c | 51 ++++++++++++++++++ drivers/misc/habanalabs/include/common/cpucp_if.h | 65 ++++++++++++++++++++--- include/uapi/misc/habanalabs.h | 4 ++ 5 files changed, 139 insertions(+), 7 deletions(-) (limited to 'include/uapi') diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index a8911e15d937..eb000e035026 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -3145,3 +3145,27 @@ int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_in sizeof(struct cpucp_sec_attest_info), nonce, HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC); } + +int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode, + dma_addr_t buff, u32 *size) +{ + struct cpucp_packet pkt = {0}; + u64 result; + int rc = 0; + + pkt.ctl = cpu_to_le32(CPUCP_PACKET_GENERIC_PASSTHROUGH << CPUCP_PKT_CTL_OPCODE_SHIFT); + pkt.addr = cpu_to_le64(buff); + pkt.data_max_size = cpu_to_le32(*size); + pkt.pkt_subidx = cpu_to_le32(sub_opcode); + + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *)&pkt, sizeof(pkt), + HL_CPUCP_INFO_TIMEOUT_USEC, &result); + if (rc) + dev_err(hdev->dev, "failed to send CPUCP data of generic fw pkt\n"); + else + dev_dbg(hdev->dev, "generic pkt was successful, result: 0x%llx\n", result); + + *size = (u32)result; + + return rc; +} diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 210dd607e18b..c609b2e44ad3 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -3790,6 +3790,8 @@ int hl_fw_get_clk_rate(struct hl_device *hdev, u32 *cur_clk, u32 *max_clk); void hl_fw_set_pll_profile(struct hl_device *hdev); void hl_sysfs_add_dev_clk_attr(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp); void hl_sysfs_add_dev_vrm_attr(struct hl_device *hdev, struct attribute_group *dev_vrm_attr_grp); +int hl_fw_send_generic_request(struct hl_device *hdev, enum hl_passthrough_type sub_opcode, + dma_addr_t buff, u32 *size); void hw_sob_get(struct hl_hw_sob *hw_sob); void hw_sob_put(struct hl_hw_sob *hw_sob); diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index b6abfa7761a7..4d642987ad02 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -820,6 +820,54 @@ static int user_mappings_info(struct hl_fpriv *hpriv, struct hl_info_args *args) ? -EFAULT : 0; } +static int send_fw_generic_request(struct hl_device *hdev, struct hl_info_args *info_args) +{ + void __user *buff = (void __user *) (uintptr_t) info_args->return_pointer; + u32 size = info_args->return_size; + dma_addr_t dma_handle; + bool need_input_buff; + void *fw_buff; + int rc = 0; + + switch (info_args->fw_sub_opcode) { + case HL_PASSTHROUGH_VERSIONS: + need_input_buff = false; + break; + default: + return -EINVAL; + } + + if (size > SZ_1M) { + dev_err(hdev->dev, "buffer size cannot exceed 1MB\n"); + return -EINVAL; + } + + fw_buff = hl_cpu_accessible_dma_pool_alloc(hdev, size, &dma_handle); + if (!fw_buff) + return -ENOMEM; + + + if (need_input_buff && copy_from_user(fw_buff, buff, size)) { + dev_dbg(hdev->dev, "Failed to copy from user FW buff\n"); + rc = -EFAULT; + goto free_buff; + } + + rc = hl_fw_send_generic_request(hdev, info_args->fw_sub_opcode, dma_handle, &size); + if (rc) + goto free_buff; + + if (copy_to_user(buff, fw_buff, min(size, info_args->return_size))) { + dev_dbg(hdev->dev, "Failed to copy to user FW generic req output\n"); + rc = -EFAULT; + } + +free_buff: + hl_cpu_accessible_dma_pool_free(hdev, info_args->return_size, fw_buff); + + return rc; +} + static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, struct device *dev) { @@ -947,6 +995,9 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, case HL_INFO_ENGINE_STATUS: return engine_status_info(hpriv, args); + case HL_INFO_FW_GENERIC_REQ: + return send_fw_generic_request(hdev, args); + default: dev_err(dev, "Invalid request %d\n", args->op); rc = -EINVAL; diff --git a/drivers/misc/habanalabs/include/common/cpucp_if.h b/drivers/misc/habanalabs/include/common/cpucp_if.h index baa5aa43b6f4..0a66b7f85164 100644 --- a/drivers/misc/habanalabs/include/common/cpucp_if.h +++ b/drivers/misc/habanalabs/include/common/cpucp_if.h @@ -643,6 +643,10 @@ enum pq_init_status { * data corruption in case of mismatched driver/FW versions. * Relevant only to Gaudi. * + * * CPUCP_PACKET_GENERIC_PASSTHROUGH - + * Generic opcode for all firmware info that is only passed to host + * through the LKD, without getting parsed there. + * * CPUCP_PACKET_ACTIVE_STATUS_SET - * LKD sends FW indication whether device is free or in use, this indication is reported * also to the BMC. @@ -704,9 +708,12 @@ enum cpucp_packet_id { CPUCP_PACKET_RESERVED5, /* not used */ CPUCP_PACKET_RESERVED6, /* not used */ CPUCP_PACKET_RESERVED7, /* not used */ + CPUCP_PACKET_GENERIC_PASSTHROUGH, /* IOCTL */ CPUCP_PACKET_RESERVED8, /* not used */ - CPUCP_PACKET_RESERVED9, /* not used */ CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ + CPUCP_PACKET_RESERVED9, /* not used */ + CPUCP_PACKET_RESERVED10, /* not used */ + CPUCP_PACKET_RESERVED11, /* not used */ CPUCP_PACKET_ID_MAX /* must be last */ }; @@ -805,8 +812,13 @@ struct cpucp_packet { __le32 nonce; }; - /* For NIC requests */ - __le32 port_index; + union { + /* For NIC requests */ + __le32 port_index; + + /* For Generic packet sub index */ + __le32 pkt_subidx; + }; }; struct cpucp_unmask_irq_arr_packet { @@ -976,6 +988,11 @@ enum pll_index { IC_PLL = 16, MC_PLL = 17, EMMC_PLL = 18, + D2D_PLL = 19, + CS_PLL = 20, + C2C_PLL = 21, + NCH_PLL = 22, + C2M_PLL = 23, PLL_MAX }; @@ -1135,8 +1152,9 @@ enum cpucp_serdes_type { HLS1_SERDES_TYPE, HLS1H_SERDES_TYPE, HLS2_SERDES_TYPE, - UNKNOWN_SERDES_TYPE, - MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE + HLS2_TYPE_1_SERDES_TYPE, + MAX_NUM_SERDES_TYPE, /* number of types */ + UNKNOWN_SERDES_TYPE = 0xFFFF /* serdes_type is u16 */ }; struct cpucp_nic_info { @@ -1160,6 +1178,21 @@ struct page_discard_info { __le32 mmu_page_idx[PAGE_DISCARD_MAX]; }; +/* + * struct frac_val - fracture value represented by "integer.frac". + * @integer: the integer part of the fracture value; + * @frac: the fracture part of the fracture value. + */ +struct frac_val { + union { + struct { + __le16 integer; + __le16 frac; + }; + __le32 val; + }; +}; + /* * struct ser_val - the SER (symbol error rate) value is represented by "integer * 10 ^ -exp". * @integer: the integer part of the SER value; @@ -1183,8 +1216,12 @@ struct ser_val { * @pcs_link: has PCS link. * @phy_ready: is PHY ready. * @auto_neg: is Autoneg enabled. - * @timeout_retransmission_cnt: timeout retransmission events - * @high_ber_cnt: high ber events + * @timeout_retransmission_cnt: timeout retransmission events. + * @high_ber_cnt: high ber events. + * @pre_fec_ser: pre FEC SER value. + * @post_fec_ser: post FEC SER value. + * @throughput: measured throughput. + * @latency: measured latency. */ struct cpucp_nic_status { __le32 port; @@ -1200,6 +1237,10 @@ struct cpucp_nic_status { __u8 auto_neg; __le32 timeout_retransmission_cnt; __le32 high_ber_cnt; + struct ser_val pre_fec_ser; + struct ser_val post_fec_ser; + struct frac_val bandwidth; + struct frac_val lat; }; enum cpucp_hbm_row_replace_cause { @@ -1317,4 +1358,14 @@ struct cpucp_monitor_dump { struct dcore_monitor_regs_data sync_mngr_e_n; }; +/* + * The Type of the generic request (and other input arguments) will be fetched from user by reading + * from "pkt_subidx" field in struct cpucp_packet. + * + * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. + */ +enum hl_passthrough_type { + HL_PASSTHROUGH_VERSIONS, +}; + #endif /* CPUCP_IF_H */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h index c67d18901c1d..90e628779264 100644 --- a/include/uapi/misc/habanalabs.h +++ b/include/uapi/misc/habanalabs.h @@ -789,6 +789,7 @@ enum hl_server_type { * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic. * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event. + * HL_INFO_FW_GENERIC_REQ - Send generic request to FW. */ #define HL_INFO_HW_IP_INFO 0 #define HL_INFO_HW_EVENTS 1 @@ -822,6 +823,7 @@ enum hl_server_type { #define HL_INFO_ENGINE_STATUS 32 #define HL_INFO_PAGE_FAULT_EVENT 33 #define HL_INFO_USER_MAPPINGS 34 +#define HL_INFO_FW_GENERIC_REQ 35 #define HL_INFO_VERSION_MAX_LEN 128 #define HL_INFO_CARD_NAME_MAX_LEN 16 @@ -1258,6 +1260,7 @@ enum gaudi_dcores { * @sec_attest_nonce: Nonce number used for attestation report. * @array_size: Number of array members copied to user buffer. * Relevant for HL_INFO_USER_MAPPINGS info ioctl. + * @fw_sub_opcode: generic requests sub opcodes. * @pad: Padding to 64 bit. */ struct hl_info_args { @@ -1274,6 +1277,7 @@ struct hl_info_args { __u32 user_buffer_actual_size; __u32 sec_attest_nonce; __u32 array_size; + __u32 fw_sub_opcode; }; __u32 pad; -- cgit v1.3.1 From 7d25cae7abf4505129f92dc581789c330640564d Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Tue, 20 Dec 2022 14:12:19 +0200 Subject: habanalabs/uapi: move uapi file to drm Move the habanalabs.h uapi file from include/uapi/misc to include/uapi/drm, and rename it to habanalabs_accel.h. This is required before moving the actual driver to the accel subsystem. Update MAINTAINERS file accordingly. Signed-off-by: Oded Gabbay --- MAINTAINERS | 2 +- drivers/misc/habanalabs/Kconfig | 2 +- drivers/misc/habanalabs/common/command_buffer.c | 2 +- .../misc/habanalabs/common/command_submission.c | 2 +- drivers/misc/habanalabs/common/device.c | 2 +- drivers/misc/habanalabs/common/habanalabs.h | 2 +- drivers/misc/habanalabs/common/habanalabs_ioctl.c | 2 +- drivers/misc/habanalabs/common/memory.c | 2 +- drivers/misc/habanalabs/common/state_dump.c | 2 +- drivers/misc/habanalabs/gaudi/gaudiP.h | 2 +- drivers/misc/habanalabs/gaudi/gaudi_coresight.c | 3 +- drivers/misc/habanalabs/gaudi2/gaudi2P.h | 2 +- drivers/misc/habanalabs/gaudi2/gaudi2_coresight.c | 2 +- drivers/misc/habanalabs/goya/goyaP.h | 2 +- drivers/misc/habanalabs/goya/goya_coresight.c | 2 +- include/uapi/drm/habanalabs_accel.h | 2225 ++++++++++++++++++++ include/uapi/misc/habanalabs.h | 2225 -------------------- 17 files changed, 2241 insertions(+), 2240 deletions(-) create mode 100644 include/uapi/drm/habanalabs_accel.h delete mode 100644 include/uapi/misc/habanalabs.h (limited to 'include/uapi') diff --git a/MAINTAINERS b/MAINTAINERS index 2758e7a64fa5..46976edebbc7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9038,7 +9038,7 @@ F: Documentation/ABI/testing/debugfs-driver-habanalabs F: Documentation/ABI/testing/sysfs-driver-habanalabs F: drivers/misc/habanalabs/ F: include/trace/events/habanalabs.h -F: include/uapi/misc/habanalabs.h +F: include/uapi/drm/habanalabs_accel.h HACKRF MEDIA DRIVER M: Antti Palosaari diff --git a/drivers/misc/habanalabs/Kconfig b/drivers/misc/habanalabs/Kconfig index bd01d0d940c0..585235531b9b 100644 --- a/drivers/misc/habanalabs/Kconfig +++ b/drivers/misc/habanalabs/Kconfig @@ -19,7 +19,7 @@ config HABANA_AI the user to submit workloads to the devices. The user-space interface is described in - include/uapi/misc/habanalabs.h + include/uapi/drm/habanalabs_accel.h If unsure, say N. diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index 24100501f8ca..6263d01cb9c1 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -5,7 +5,7 @@ * All Rights Reserved. */ -#include +#include #include "habanalabs.h" #include diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c index 1543ef993f8e..f6ee10334235 100644 --- a/drivers/misc/habanalabs/common/command_submission.c +++ b/drivers/misc/habanalabs/common/command_submission.c @@ -5,7 +5,7 @@ * All Rights Reserved. */ -#include +#include #include "habanalabs.h" #include diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 71f958a2e91b..6620580e9ba8 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -7,7 +7,7 @@ #define pr_fmt(fmt) "habanalabs: " fmt -#include +#include #include "habanalabs.h" #include diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index c609b2e44ad3..7b6f10033ee9 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -11,7 +11,7 @@ #include "../include/common/cpucp_if.h" #include "../include/common/qman_if.h" #include "../include/hw_ip/mmu/mmu_general.h" -#include +#include #include #include diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c index 4d642987ad02..079483421e12 100644 --- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c +++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c @@ -7,7 +7,7 @@ #define pr_fmt(fmt) "habanalabs: " fmt -#include +#include #include "habanalabs.h" #include diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index a2d24c9a3d1e..1c38fab39337 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -5,7 +5,7 @@ * All Rights Reserved. */ -#include +#include #include "habanalabs.h" #include "../include/hw_ip/mmu/mmu_general.h" diff --git a/drivers/misc/habanalabs/common/state_dump.c b/drivers/misc/habanalabs/common/state_dump.c index 74726907c95e..3a9931f24259 100644 --- a/drivers/misc/habanalabs/common/state_dump.c +++ b/drivers/misc/habanalabs/common/state_dump.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include "habanalabs.h" /** diff --git a/drivers/misc/habanalabs/gaudi/gaudiP.h b/drivers/misc/habanalabs/gaudi/gaudiP.h index 4fbcf3f0afe5..3d88d56c8eb3 100644 --- a/drivers/misc/habanalabs/gaudi/gaudiP.h +++ b/drivers/misc/habanalabs/gaudi/gaudiP.h @@ -8,7 +8,7 @@ #ifndef GAUDIP_H_ #define GAUDIP_H_ -#include +#include #include "../common/habanalabs.h" #include "../include/common/hl_boot_if.h" #include "../include/gaudi/gaudi_packets.h" diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 08108f5fed67..3455b14554c6 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -11,7 +11,8 @@ #include "../include/gaudi/gaudi_masks.h" #include "../include/gaudi/gaudi_reg_map.h" -#include +#include + #define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET #define SPMU_EVENT_TYPES_OFFSET 0x400 #define SPMU_MAX_COUNTERS 6 diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2P.h b/drivers/misc/habanalabs/gaudi2/gaudi2P.h index b4383c199bbb..ed09864c2dfc 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2P.h +++ b/drivers/misc/habanalabs/gaudi2/gaudi2P.h @@ -8,7 +8,7 @@ #ifndef GAUDI2P_H_ #define GAUDI2P_H_ -#include +#include #include "../common/habanalabs.h" #include "../include/common/hl_boot_if.h" #include "../include/gaudi2/gaudi2.h" diff --git a/drivers/misc/habanalabs/gaudi2/gaudi2_coresight.c b/drivers/misc/habanalabs/gaudi2/gaudi2_coresight.c index 1df7a59e4101..1dfbe293ecec 100644 --- a/drivers/misc/habanalabs/gaudi2/gaudi2_coresight.c +++ b/drivers/misc/habanalabs/gaudi2/gaudi2_coresight.c @@ -5,7 +5,7 @@ * All Rights Reserved. */ #include "gaudi2_coresight_regs.h" -#include +#include #define GAUDI2_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 2000) #define SPMU_MAX_COUNTERS 6 diff --git a/drivers/misc/habanalabs/goya/goyaP.h b/drivers/misc/habanalabs/goya/goyaP.h index d6ec43d6f6b0..5df3d30b91fd 100644 --- a/drivers/misc/habanalabs/goya/goyaP.h +++ b/drivers/misc/habanalabs/goya/goyaP.h @@ -8,7 +8,7 @@ #ifndef GOYAP_H_ #define GOYAP_H_ -#include +#include #include "../common/habanalabs.h" #include "../include/common/hl_boot_if.h" #include "../include/goya/goya_packets.h" diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index 2c5133cfae65..e7ac3046cfaa 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -10,7 +10,7 @@ #include "../include/goya/asic_reg/goya_regs.h" #include "../include/goya/asic_reg/goya_masks.h" -#include +#include #define GOYA_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 100) diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h new file mode 100644 index 000000000000..90e628779264 --- /dev/null +++ b/include/uapi/drm/habanalabs_accel.h @@ -0,0 +1,2225 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note + * + * Copyright 2016-2022 HabanaLabs, Ltd. + * All Rights Reserved. + * + */ + +#ifndef HABANALABS_H_ +#define HABANALABS_H_ + +#include +#include + +/* + * Defines that are asic-specific but constitutes as ABI between kernel driver + * and userspace + */ +#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ +#define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ + +/* + * 128 SOBs reserved for collective wait + * 16 SOBs reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144 + +/* + * 64 monitors reserved for collective wait + * 8 monitors reserved for sync stream + */ +#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 + +/* Max number of elements in timestamps registration buffers */ +#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */ + +/* + * Goya queue Numbering + * + * The external queues (PCI DMA channels) MUST be before the internal queues + * and each group (PCI DMA channels and internal) must be contiguous inside + * itself but there can be a gap between the two groups (although not + * recommended) + */ + +enum goya_queue_id { + GOYA_QUEUE_ID_DMA_0 = 0, + GOYA_QUEUE_ID_DMA_1 = 1, + GOYA_QUEUE_ID_DMA_2 = 2, + GOYA_QUEUE_ID_DMA_3 = 3, + GOYA_QUEUE_ID_DMA_4 = 4, + GOYA_QUEUE_ID_CPU_PQ = 5, + GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */ + GOYA_QUEUE_ID_TPC0 = 7, + GOYA_QUEUE_ID_TPC1 = 8, + GOYA_QUEUE_ID_TPC2 = 9, + GOYA_QUEUE_ID_TPC3 = 10, + GOYA_QUEUE_ID_TPC4 = 11, + GOYA_QUEUE_ID_TPC5 = 12, + GOYA_QUEUE_ID_TPC6 = 13, + GOYA_QUEUE_ID_TPC7 = 14, + GOYA_QUEUE_ID_SIZE +}; + +/* + * Gaudi queue Numbering + * External queues (PCI DMA channels) are DMA_0_*, DMA_1_* and DMA_5_*. + * Except one CPU queue, all the rest are internal queues. + */ + +enum gaudi_queue_id { + GAUDI_QUEUE_ID_DMA_0_0 = 0, /* external */ + GAUDI_QUEUE_ID_DMA_0_1 = 1, /* external */ + GAUDI_QUEUE_ID_DMA_0_2 = 2, /* external */ + GAUDI_QUEUE_ID_DMA_0_3 = 3, /* external */ + GAUDI_QUEUE_ID_DMA_1_0 = 4, /* external */ + GAUDI_QUEUE_ID_DMA_1_1 = 5, /* external */ + GAUDI_QUEUE_ID_DMA_1_2 = 6, /* external */ + GAUDI_QUEUE_ID_DMA_1_3 = 7, /* external */ + GAUDI_QUEUE_ID_CPU_PQ = 8, /* CPU */ + GAUDI_QUEUE_ID_DMA_2_0 = 9, /* internal */ + GAUDI_QUEUE_ID_DMA_2_1 = 10, /* internal */ + GAUDI_QUEUE_ID_DMA_2_2 = 11, /* internal */ + GAUDI_QUEUE_ID_DMA_2_3 = 12, /* internal */ + GAUDI_QUEUE_ID_DMA_3_0 = 13, /* internal */ + GAUDI_QUEUE_ID_DMA_3_1 = 14, /* internal */ + GAUDI_QUEUE_ID_DMA_3_2 = 15, /* internal */ + GAUDI_QUEUE_ID_DMA_3_3 = 16, /* internal */ + GAUDI_QUEUE_ID_DMA_4_0 = 17, /* internal */ + GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ + GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ + GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ + GAUDI_QUEUE_ID_DMA_5_0 = 21, /* internal */ + GAUDI_QUEUE_ID_DMA_5_1 = 22, /* internal */ + GAUDI_QUEUE_ID_DMA_5_2 = 23, /* internal */ + GAUDI_QUEUE_ID_DMA_5_3 = 24, /* internal */ + GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ + GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ + GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ + GAUDI_QUEUE_ID_DMA_6_3 = 28, /* internal */ + GAUDI_QUEUE_ID_DMA_7_0 = 29, /* internal */ + GAUDI_QUEUE_ID_DMA_7_1 = 30, /* internal */ + GAUDI_QUEUE_ID_DMA_7_2 = 31, /* internal */ + GAUDI_QUEUE_ID_DMA_7_3 = 32, /* internal */ + GAUDI_QUEUE_ID_MME_0_0 = 33, /* internal */ + GAUDI_QUEUE_ID_MME_0_1 = 34, /* internal */ + GAUDI_QUEUE_ID_MME_0_2 = 35, /* internal */ + GAUDI_QUEUE_ID_MME_0_3 = 36, /* internal */ + GAUDI_QUEUE_ID_MME_1_0 = 37, /* internal */ + GAUDI_QUEUE_ID_MME_1_1 = 38, /* internal */ + GAUDI_QUEUE_ID_MME_1_2 = 39, /* internal */ + GAUDI_QUEUE_ID_MME_1_3 = 40, /* internal */ + GAUDI_QUEUE_ID_TPC_0_0 = 41, /* internal */ + GAUDI_QUEUE_ID_TPC_0_1 = 42, /* internal */ + GAUDI_QUEUE_ID_TPC_0_2 = 43, /* internal */ + GAUDI_QUEUE_ID_TPC_0_3 = 44, /* internal */ + GAUDI_QUEUE_ID_TPC_1_0 = 45, /* internal */ + GAUDI_QUEUE_ID_TPC_1_1 = 46, /* internal */ + GAUDI_QUEUE_ID_TPC_1_2 = 47, /* internal */ + GAUDI_QUEUE_ID_TPC_1_3 = 48, /* internal */ + GAUDI_QUEUE_ID_TPC_2_0 = 49, /* internal */ + GAUDI_QUEUE_ID_TPC_2_1 = 50, /* internal */ + GAUDI_QUEUE_ID_TPC_2_2 = 51, /* internal */ + GAUDI_QUEUE_ID_TPC_2_3 = 52, /* internal */ + GAUDI_QUEUE_ID_TPC_3_0 = 53, /* internal */ + GAUDI_QUEUE_ID_TPC_3_1 = 54, /* internal */ + GAUDI_QUEUE_ID_TPC_3_2 = 55, /* internal */ + GAUDI_QUEUE_ID_TPC_3_3 = 56, /* internal */ + GAUDI_QUEUE_ID_TPC_4_0 = 57, /* internal */ + GAUDI_QUEUE_ID_TPC_4_1 = 58, /* internal */ + GAUDI_QUEUE_ID_TPC_4_2 = 59, /* internal */ + GAUDI_QUEUE_ID_TPC_4_3 = 60, /* internal */ + GAUDI_QUEUE_ID_TPC_5_0 = 61, /* internal */ + GAUDI_QUEUE_ID_TPC_5_1 = 62, /* internal */ + GAUDI_QUEUE_ID_TPC_5_2 = 63, /* internal */ + GAUDI_QUEUE_ID_TPC_5_3 = 64, /* internal */ + GAUDI_QUEUE_ID_TPC_6_0 = 65, /* internal */ + GAUDI_QUEUE_ID_TPC_6_1 = 66, /* internal */ + GAUDI_QUEUE_ID_TPC_6_2 = 67, /* internal */ + GAUDI_QUEUE_ID_TPC_6_3 = 68, /* internal */ + GAUDI_QUEUE_ID_TPC_7_0 = 69, /* internal */ + GAUDI_QUEUE_ID_TPC_7_1 = 70, /* internal */ + GAUDI_QUEUE_ID_TPC_7_2 = 71, /* internal */ + GAUDI_QUEUE_ID_TPC_7_3 = 72, /* internal */ + GAUDI_QUEUE_ID_NIC_0_0 = 73, /* internal */ + GAUDI_QUEUE_ID_NIC_0_1 = 74, /* internal */ + GAUDI_QUEUE_ID_NIC_0_2 = 75, /* internal */ + GAUDI_QUEUE_ID_NIC_0_3 = 76, /* internal */ + GAUDI_QUEUE_ID_NIC_1_0 = 77, /* internal */ + GAUDI_QUEUE_ID_NIC_1_1 = 78, /* internal */ + GAUDI_QUEUE_ID_NIC_1_2 = 79, /* internal */ + GAUDI_QUEUE_ID_NIC_1_3 = 80, /* internal */ + GAUDI_QUEUE_ID_NIC_2_0 = 81, /* internal */ + GAUDI_QUEUE_ID_NIC_2_1 = 82, /* internal */ + GAUDI_QUEUE_ID_NIC_2_2 = 83, /* internal */ + GAUDI_QUEUE_ID_NIC_2_3 = 84, /* internal */ + GAUDI_QUEUE_ID_NIC_3_0 = 85, /* internal */ + GAUDI_QUEUE_ID_NIC_3_1 = 86, /* internal */ + GAUDI_QUEUE_ID_NIC_3_2 = 87, /* internal */ + GAUDI_QUEUE_ID_NIC_3_3 = 88, /* internal */ + GAUDI_QUEUE_ID_NIC_4_0 = 89, /* internal */ + GAUDI_QUEUE_ID_NIC_4_1 = 90, /* internal */ + GAUDI_QUEUE_ID_NIC_4_2 = 91, /* internal */ + GAUDI_QUEUE_ID_NIC_4_3 = 92, /* internal */ + GAUDI_QUEUE_ID_NIC_5_0 = 93, /* internal */ + GAUDI_QUEUE_ID_NIC_5_1 = 94, /* internal */ + GAUDI_QUEUE_ID_NIC_5_2 = 95, /* internal */ + GAUDI_QUEUE_ID_NIC_5_3 = 96, /* internal */ + GAUDI_QUEUE_ID_NIC_6_0 = 97, /* internal */ + GAUDI_QUEUE_ID_NIC_6_1 = 98, /* internal */ + GAUDI_QUEUE_ID_NIC_6_2 = 99, /* internal */ + GAUDI_QUEUE_ID_NIC_6_3 = 100, /* internal */ + GAUDI_QUEUE_ID_NIC_7_0 = 101, /* internal */ + GAUDI_QUEUE_ID_NIC_7_1 = 102, /* internal */ + GAUDI_QUEUE_ID_NIC_7_2 = 103, /* internal */ + GAUDI_QUEUE_ID_NIC_7_3 = 104, /* internal */ + GAUDI_QUEUE_ID_NIC_8_0 = 105, /* internal */ + GAUDI_QUEUE_ID_NIC_8_1 = 106, /* internal */ + GAUDI_QUEUE_ID_NIC_8_2 = 107, /* internal */ + GAUDI_QUEUE_ID_NIC_8_3 = 108, /* internal */ + GAUDI_QUEUE_ID_NIC_9_0 = 109, /* internal */ + GAUDI_QUEUE_ID_NIC_9_1 = 110, /* internal */ + GAUDI_QUEUE_ID_NIC_9_2 = 111, /* internal */ + GAUDI_QUEUE_ID_NIC_9_3 = 112, /* internal */ + GAUDI_QUEUE_ID_SIZE +}; + +/* + * In GAUDI2 we have two modes of operation in regard to queues: + * 1. Legacy mode, where each QMAN exposes 4 streams to the user + * 2. F/W mode, where we use F/W to schedule the JOBS to the different queues. + * + * When in legacy mode, the user sends the queue id per JOB according to + * enum gaudi2_queue_id below. + * + * When in F/W mode, the user sends a stream id per Command Submission. The + * stream id is a running number from 0 up to (N-1), where N is the number + * of streams the F/W exposes and is passed to the user in + * struct hl_info_hw_ip_info + */ + +enum gaudi2_queue_id { + GAUDI2_QUEUE_ID_PDMA_0_0 = 0, + GAUDI2_QUEUE_ID_PDMA_0_1 = 1, + GAUDI2_QUEUE_ID_PDMA_0_2 = 2, + GAUDI2_QUEUE_ID_PDMA_0_3 = 3, + GAUDI2_QUEUE_ID_PDMA_1_0 = 4, + GAUDI2_QUEUE_ID_PDMA_1_1 = 5, + GAUDI2_QUEUE_ID_PDMA_1_2 = 6, + GAUDI2_QUEUE_ID_PDMA_1_3 = 7, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0 = 8, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1 = 9, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2 = 10, + GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3 = 11, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0 = 12, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1 = 13, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2 = 14, + GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3 = 15, + GAUDI2_QUEUE_ID_DCORE0_MME_0_0 = 16, + GAUDI2_QUEUE_ID_DCORE0_MME_0_1 = 17, + GAUDI2_QUEUE_ID_DCORE0_MME_0_2 = 18, + GAUDI2_QUEUE_ID_DCORE0_MME_0_3 = 19, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 = 20, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_1 = 21, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_2 = 22, + GAUDI2_QUEUE_ID_DCORE0_TPC_0_3 = 23, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_0 = 24, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_1 = 25, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_2 = 26, + GAUDI2_QUEUE_ID_DCORE0_TPC_1_3 = 27, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_0 = 28, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_1 = 29, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_2 = 30, + GAUDI2_QUEUE_ID_DCORE0_TPC_2_3 = 31, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_0 = 32, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_1 = 33, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_2 = 34, + GAUDI2_QUEUE_ID_DCORE0_TPC_3_3 = 35, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_0 = 36, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_1 = 37, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_2 = 38, + GAUDI2_QUEUE_ID_DCORE0_TPC_4_3 = 39, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_0 = 40, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_1 = 41, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_2 = 42, + GAUDI2_QUEUE_ID_DCORE0_TPC_5_3 = 43, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 = 44, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_1 = 45, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_2 = 46, + GAUDI2_QUEUE_ID_DCORE0_TPC_6_3 = 47, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0 = 48, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1 = 49, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2 = 50, + GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3 = 51, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0 = 52, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1 = 53, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2 = 54, + GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3 = 55, + GAUDI2_QUEUE_ID_DCORE1_MME_0_0 = 56, + GAUDI2_QUEUE_ID_DCORE1_MME_0_1 = 57, + GAUDI2_QUEUE_ID_DCORE1_MME_0_2 = 58, + GAUDI2_QUEUE_ID_DCORE1_MME_0_3 = 59, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 = 60, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_1 = 61, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_2 = 62, + GAUDI2_QUEUE_ID_DCORE1_TPC_0_3 = 63, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_0 = 64, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_1 = 65, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_2 = 66, + GAUDI2_QUEUE_ID_DCORE1_TPC_1_3 = 67, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_0 = 68, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_1 = 69, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_2 = 70, + GAUDI2_QUEUE_ID_DCORE1_TPC_2_3 = 71, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_0 = 72, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_1 = 73, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_2 = 74, + GAUDI2_QUEUE_ID_DCORE1_TPC_3_3 = 75, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_0 = 76, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_1 = 77, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_2 = 78, + GAUDI2_QUEUE_ID_DCORE1_TPC_4_3 = 79, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_0 = 80, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_1 = 81, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_2 = 82, + GAUDI2_QUEUE_ID_DCORE1_TPC_5_3 = 83, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0 = 84, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1 = 85, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2 = 86, + GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3 = 87, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0 = 88, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1 = 89, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2 = 90, + GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3 = 91, + GAUDI2_QUEUE_ID_DCORE2_MME_0_0 = 92, + GAUDI2_QUEUE_ID_DCORE2_MME_0_1 = 93, + GAUDI2_QUEUE_ID_DCORE2_MME_0_2 = 94, + GAUDI2_QUEUE_ID_DCORE2_MME_0_3 = 95, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 = 96, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_1 = 97, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_2 = 98, + GAUDI2_QUEUE_ID_DCORE2_TPC_0_3 = 99, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_0 = 100, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_1 = 101, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_2 = 102, + GAUDI2_QUEUE_ID_DCORE2_TPC_1_3 = 103, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_0 = 104, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_1 = 105, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_2 = 106, + GAUDI2_QUEUE_ID_DCORE2_TPC_2_3 = 107, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_0 = 108, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_1 = 109, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_2 = 110, + GAUDI2_QUEUE_ID_DCORE2_TPC_3_3 = 111, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_0 = 112, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_1 = 113, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_2 = 114, + GAUDI2_QUEUE_ID_DCORE2_TPC_4_3 = 115, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_0 = 116, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_1 = 117, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_2 = 118, + GAUDI2_QUEUE_ID_DCORE2_TPC_5_3 = 119, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0 = 120, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1 = 121, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2 = 122, + GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3 = 123, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0 = 124, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1 = 125, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2 = 126, + GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3 = 127, + GAUDI2_QUEUE_ID_DCORE3_MME_0_0 = 128, + GAUDI2_QUEUE_ID_DCORE3_MME_0_1 = 129, + GAUDI2_QUEUE_ID_DCORE3_MME_0_2 = 130, + GAUDI2_QUEUE_ID_DCORE3_MME_0_3 = 131, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 = 132, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_1 = 133, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_2 = 134, + GAUDI2_QUEUE_ID_DCORE3_TPC_0_3 = 135, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_0 = 136, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_1 = 137, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_2 = 138, + GAUDI2_QUEUE_ID_DCORE3_TPC_1_3 = 139, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_0 = 140, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_1 = 141, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_2 = 142, + GAUDI2_QUEUE_ID_DCORE3_TPC_2_3 = 143, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_0 = 144, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_1 = 145, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_2 = 146, + GAUDI2_QUEUE_ID_DCORE3_TPC_3_3 = 147, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_0 = 148, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_1 = 149, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_2 = 150, + GAUDI2_QUEUE_ID_DCORE3_TPC_4_3 = 151, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_0 = 152, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_1 = 153, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_2 = 154, + GAUDI2_QUEUE_ID_DCORE3_TPC_5_3 = 155, + GAUDI2_QUEUE_ID_NIC_0_0 = 156, + GAUDI2_QUEUE_ID_NIC_0_1 = 157, + GAUDI2_QUEUE_ID_NIC_0_2 = 158, + GAUDI2_QUEUE_ID_NIC_0_3 = 159, + GAUDI2_QUEUE_ID_NIC_1_0 = 160, + GAUDI2_QUEUE_ID_NIC_1_1 = 161, + GAUDI2_QUEUE_ID_NIC_1_2 = 162, + GAUDI2_QUEUE_ID_NIC_1_3 = 163, + GAUDI2_QUEUE_ID_NIC_2_0 = 164, + GAUDI2_QUEUE_ID_NIC_2_1 = 165, + GAUDI2_QUEUE_ID_NIC_2_2 = 166, + GAUDI2_QUEUE_ID_NIC_2_3 = 167, + GAUDI2_QUEUE_ID_NIC_3_0 = 168, + GAUDI2_QUEUE_ID_NIC_3_1 = 169, + GAUDI2_QUEUE_ID_NIC_3_2 = 170, + GAUDI2_QUEUE_ID_NIC_3_3 = 171, + GAUDI2_QUEUE_ID_NIC_4_0 = 172, + GAUDI2_QUEUE_ID_NIC_4_1 = 173, + GAUDI2_QUEUE_ID_NIC_4_2 = 174, + GAUDI2_QUEUE_ID_NIC_4_3 = 175, + GAUDI2_QUEUE_ID_NIC_5_0 = 176, + GAUDI2_QUEUE_ID_NIC_5_1 = 177, + GAUDI2_QUEUE_ID_NIC_5_2 = 178, + GAUDI2_QUEUE_ID_NIC_5_3 = 179, + GAUDI2_QUEUE_ID_NIC_6_0 = 180, + GAUDI2_QUEUE_ID_NIC_6_1 = 181, + GAUDI2_QUEUE_ID_NIC_6_2 = 182, + GAUDI2_QUEUE_ID_NIC_6_3 = 183, + GAUDI2_QUEUE_ID_NIC_7_0 = 184, + GAUDI2_QUEUE_ID_NIC_7_1 = 185, + GAUDI2_QUEUE_ID_NIC_7_2 = 186, + GAUDI2_QUEUE_ID_NIC_7_3 = 187, + GAUDI2_QUEUE_ID_NIC_8_0 = 188, + GAUDI2_QUEUE_ID_NIC_8_1 = 189, + GAUDI2_QUEUE_ID_NIC_8_2 = 190, + GAUDI2_QUEUE_ID_NIC_8_3 = 191, + GAUDI2_QUEUE_ID_NIC_9_0 = 192, + GAUDI2_QUEUE_ID_NIC_9_1 = 193, + GAUDI2_QUEUE_ID_NIC_9_2 = 194, + GAUDI2_QUEUE_ID_NIC_9_3 = 195, + GAUDI2_QUEUE_ID_NIC_10_0 = 196, + GAUDI2_QUEUE_ID_NIC_10_1 = 197, + GAUDI2_QUEUE_ID_NIC_10_2 = 198, + GAUDI2_QUEUE_ID_NIC_10_3 = 199, + GAUDI2_QUEUE_ID_NIC_11_0 = 200, + GAUDI2_QUEUE_ID_NIC_11_1 = 201, + GAUDI2_QUEUE_ID_NIC_11_2 = 202, + GAUDI2_QUEUE_ID_NIC_11_3 = 203, + GAUDI2_QUEUE_ID_NIC_12_0 = 204, + GAUDI2_QUEUE_ID_NIC_12_1 = 205, + GAUDI2_QUEUE_ID_NIC_12_2 = 206, + GAUDI2_QUEUE_ID_NIC_12_3 = 207, + GAUDI2_QUEUE_ID_NIC_13_0 = 208, + GAUDI2_QUEUE_ID_NIC_13_1 = 209, + GAUDI2_QUEUE_ID_NIC_13_2 = 210, + GAUDI2_QUEUE_ID_NIC_13_3 = 211, + GAUDI2_QUEUE_ID_NIC_14_0 = 212, + GAUDI2_QUEUE_ID_NIC_14_1 = 213, + GAUDI2_QUEUE_ID_NIC_14_2 = 214, + GAUDI2_QUEUE_ID_NIC_14_3 = 215, + GAUDI2_QUEUE_ID_NIC_15_0 = 216, + GAUDI2_QUEUE_ID_NIC_15_1 = 217, + GAUDI2_QUEUE_ID_NIC_15_2 = 218, + GAUDI2_QUEUE_ID_NIC_15_3 = 219, + GAUDI2_QUEUE_ID_NIC_16_0 = 220, + GAUDI2_QUEUE_ID_NIC_16_1 = 221, + GAUDI2_QUEUE_ID_NIC_16_2 = 222, + GAUDI2_QUEUE_ID_NIC_16_3 = 223, + GAUDI2_QUEUE_ID_NIC_17_0 = 224, + GAUDI2_QUEUE_ID_NIC_17_1 = 225, + GAUDI2_QUEUE_ID_NIC_17_2 = 226, + GAUDI2_QUEUE_ID_NIC_17_3 = 227, + GAUDI2_QUEUE_ID_NIC_18_0 = 228, + GAUDI2_QUEUE_ID_NIC_18_1 = 229, + GAUDI2_QUEUE_ID_NIC_18_2 = 230, + GAUDI2_QUEUE_ID_NIC_18_3 = 231, + GAUDI2_QUEUE_ID_NIC_19_0 = 232, + GAUDI2_QUEUE_ID_NIC_19_1 = 233, + GAUDI2_QUEUE_ID_NIC_19_2 = 234, + GAUDI2_QUEUE_ID_NIC_19_3 = 235, + GAUDI2_QUEUE_ID_NIC_20_0 = 236, + GAUDI2_QUEUE_ID_NIC_20_1 = 237, + GAUDI2_QUEUE_ID_NIC_20_2 = 238, + GAUDI2_QUEUE_ID_NIC_20_3 = 239, + GAUDI2_QUEUE_ID_NIC_21_0 = 240, + GAUDI2_QUEUE_ID_NIC_21_1 = 241, + GAUDI2_QUEUE_ID_NIC_21_2 = 242, + GAUDI2_QUEUE_ID_NIC_21_3 = 243, + GAUDI2_QUEUE_ID_NIC_22_0 = 244, + GAUDI2_QUEUE_ID_NIC_22_1 = 245, + GAUDI2_QUEUE_ID_NIC_22_2 = 246, + GAUDI2_QUEUE_ID_NIC_22_3 = 247, + GAUDI2_QUEUE_ID_NIC_23_0 = 248, + GAUDI2_QUEUE_ID_NIC_23_1 = 249, + GAUDI2_QUEUE_ID_NIC_23_2 = 250, + GAUDI2_QUEUE_ID_NIC_23_3 = 251, + GAUDI2_QUEUE_ID_ROT_0_0 = 252, + GAUDI2_QUEUE_ID_ROT_0_1 = 253, + GAUDI2_QUEUE_ID_ROT_0_2 = 254, + GAUDI2_QUEUE_ID_ROT_0_3 = 255, + GAUDI2_QUEUE_ID_ROT_1_0 = 256, + GAUDI2_QUEUE_ID_ROT_1_1 = 257, + GAUDI2_QUEUE_ID_ROT_1_2 = 258, + GAUDI2_QUEUE_ID_ROT_1_3 = 259, + GAUDI2_QUEUE_ID_CPU_PQ = 260, + GAUDI2_QUEUE_ID_SIZE +}; + +/* + * Engine Numbering + * + * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle' + */ + +enum goya_engine_id { + GOYA_ENGINE_ID_DMA_0 = 0, + GOYA_ENGINE_ID_DMA_1, + GOYA_ENGINE_ID_DMA_2, + GOYA_ENGINE_ID_DMA_3, + GOYA_ENGINE_ID_DMA_4, + GOYA_ENGINE_ID_MME_0, + GOYA_ENGINE_ID_TPC_0, + GOYA_ENGINE_ID_TPC_1, + GOYA_ENGINE_ID_TPC_2, + GOYA_ENGINE_ID_TPC_3, + GOYA_ENGINE_ID_TPC_4, + GOYA_ENGINE_ID_TPC_5, + GOYA_ENGINE_ID_TPC_6, + GOYA_ENGINE_ID_TPC_7, + GOYA_ENGINE_ID_SIZE +}; + +enum gaudi_engine_id { + GAUDI_ENGINE_ID_DMA_0 = 0, + GAUDI_ENGINE_ID_DMA_1, + GAUDI_ENGINE_ID_DMA_2, + GAUDI_ENGINE_ID_DMA_3, + GAUDI_ENGINE_ID_DMA_4, + GAUDI_ENGINE_ID_DMA_5, + GAUDI_ENGINE_ID_DMA_6, + GAUDI_ENGINE_ID_DMA_7, + GAUDI_ENGINE_ID_MME_0, + GAUDI_ENGINE_ID_MME_1, + GAUDI_ENGINE_ID_MME_2, + GAUDI_ENGINE_ID_MME_3, + GAUDI_ENGINE_ID_TPC_0, + GAUDI_ENGINE_ID_TPC_1, + GAUDI_ENGINE_ID_TPC_2, + GAUDI_ENGINE_ID_TPC_3, + GAUDI_ENGINE_ID_TPC_4, + GAUDI_ENGINE_ID_TPC_5, + GAUDI_ENGINE_ID_TPC_6, + GAUDI_ENGINE_ID_TPC_7, + GAUDI_ENGINE_ID_NIC_0, + GAUDI_ENGINE_ID_NIC_1, + GAUDI_ENGINE_ID_NIC_2, + GAUDI_ENGINE_ID_NIC_3, + GAUDI_ENGINE_ID_NIC_4, + GAUDI_ENGINE_ID_NIC_5, + GAUDI_ENGINE_ID_NIC_6, + GAUDI_ENGINE_ID_NIC_7, + GAUDI_ENGINE_ID_NIC_8, + GAUDI_ENGINE_ID_NIC_9, + GAUDI_ENGINE_ID_SIZE +}; + +enum gaudi2_engine_id { + GAUDI2_DCORE0_ENGINE_ID_EDMA_0 = 0, + GAUDI2_DCORE0_ENGINE_ID_EDMA_1, + GAUDI2_DCORE0_ENGINE_ID_MME, + GAUDI2_DCORE0_ENGINE_ID_TPC_0, + GAUDI2_DCORE0_ENGINE_ID_TPC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_2, + GAUDI2_DCORE0_ENGINE_ID_TPC_3, + GAUDI2_DCORE0_ENGINE_ID_TPC_4, + GAUDI2_DCORE0_ENGINE_ID_TPC_5, + GAUDI2_DCORE0_ENGINE_ID_DEC_0, + GAUDI2_DCORE0_ENGINE_ID_DEC_1, + GAUDI2_DCORE1_ENGINE_ID_EDMA_0, + GAUDI2_DCORE1_ENGINE_ID_EDMA_1, + GAUDI2_DCORE1_ENGINE_ID_MME, + GAUDI2_DCORE1_ENGINE_ID_TPC_0, + GAUDI2_DCORE1_ENGINE_ID_TPC_1, + GAUDI2_DCORE1_ENGINE_ID_TPC_2, + GAUDI2_DCORE1_ENGINE_ID_TPC_3, + GAUDI2_DCORE1_ENGINE_ID_TPC_4, + GAUDI2_DCORE1_ENGINE_ID_TPC_5, + GAUDI2_DCORE1_ENGINE_ID_DEC_0, + GAUDI2_DCORE1_ENGINE_ID_DEC_1, + GAUDI2_DCORE2_ENGINE_ID_EDMA_0, + GAUDI2_DCORE2_ENGINE_ID_EDMA_1, + GAUDI2_DCORE2_ENGINE_ID_MME, + GAUDI2_DCORE2_ENGINE_ID_TPC_0, + GAUDI2_DCORE2_ENGINE_ID_TPC_1, + GAUDI2_DCORE2_ENGINE_ID_TPC_2, + GAUDI2_DCORE2_ENGINE_ID_TPC_3, + GAUDI2_DCORE2_ENGINE_ID_TPC_4, + GAUDI2_DCORE2_ENGINE_ID_TPC_5, + GAUDI2_DCORE2_ENGINE_ID_DEC_0, + GAUDI2_DCORE2_ENGINE_ID_DEC_1, + GAUDI2_DCORE3_ENGINE_ID_EDMA_0, + GAUDI2_DCORE3_ENGINE_ID_EDMA_1, + GAUDI2_DCORE3_ENGINE_ID_MME, + GAUDI2_DCORE3_ENGINE_ID_TPC_0, + GAUDI2_DCORE3_ENGINE_ID_TPC_1, + GAUDI2_DCORE3_ENGINE_ID_TPC_2, + GAUDI2_DCORE3_ENGINE_ID_TPC_3, + GAUDI2_DCORE3_ENGINE_ID_TPC_4, + GAUDI2_DCORE3_ENGINE_ID_TPC_5, + GAUDI2_DCORE3_ENGINE_ID_DEC_0, + GAUDI2_DCORE3_ENGINE_ID_DEC_1, + GAUDI2_DCORE0_ENGINE_ID_TPC_6, + GAUDI2_ENGINE_ID_PDMA_0, + GAUDI2_ENGINE_ID_PDMA_1, + GAUDI2_ENGINE_ID_ROT_0, + GAUDI2_ENGINE_ID_ROT_1, + GAUDI2_PCIE_ENGINE_ID_DEC_0, + GAUDI2_PCIE_ENGINE_ID_DEC_1, + GAUDI2_ENGINE_ID_NIC0_0, + GAUDI2_ENGINE_ID_NIC0_1, + GAUDI2_ENGINE_ID_NIC1_0, + GAUDI2_ENGINE_ID_NIC1_1, + GAUDI2_ENGINE_ID_NIC2_0, + GAUDI2_ENGINE_ID_NIC2_1, + GAUDI2_ENGINE_ID_NIC3_0, + GAUDI2_ENGINE_ID_NIC3_1, + GAUDI2_ENGINE_ID_NIC4_0, + GAUDI2_ENGINE_ID_NIC4_1, + GAUDI2_ENGINE_ID_NIC5_0, + GAUDI2_ENGINE_ID_NIC5_1, + GAUDI2_ENGINE_ID_NIC6_0, + GAUDI2_ENGINE_ID_NIC6_1, + GAUDI2_ENGINE_ID_NIC7_0, + GAUDI2_ENGINE_ID_NIC7_1, + GAUDI2_ENGINE_ID_NIC8_0, + GAUDI2_ENGINE_ID_NIC8_1, + GAUDI2_ENGINE_ID_NIC9_0, + GAUDI2_ENGINE_ID_NIC9_1, + GAUDI2_ENGINE_ID_NIC10_0, + GAUDI2_ENGINE_ID_NIC10_1, + GAUDI2_ENGINE_ID_NIC11_0, + GAUDI2_ENGINE_ID_NIC11_1, + GAUDI2_ENGINE_ID_PCIE, + GAUDI2_ENGINE_ID_PSOC, + GAUDI2_ENGINE_ID_ARC_FARM, + GAUDI2_ENGINE_ID_KDMA, + GAUDI2_ENGINE_ID_SIZE +}; + +/* + * ASIC specific PLL index + * + * Used to retrieve in frequency info of different IPs via + * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be + * used as an index in struct hl_pll_frequency_info + */ + +enum hl_goya_pll_index { + HL_GOYA_CPU_PLL = 0, + HL_GOYA_IC_PLL, + HL_GOYA_MC_PLL, + HL_GOYA_MME_PLL, + HL_GOYA_PCI_PLL, + HL_GOYA_EMMC_PLL, + HL_GOYA_TPC_PLL, + HL_GOYA_PLL_MAX +}; + +enum hl_gaudi_pll_index { + HL_GAUDI_CPU_PLL = 0, + HL_GAUDI_PCI_PLL, + HL_GAUDI_SRAM_PLL, + HL_GAUDI_HBM_PLL, + HL_GAUDI_NIC_PLL, + HL_GAUDI_DMA_PLL, + HL_GAUDI_MESH_PLL, + HL_GAUDI_MME_PLL, + HL_GAUDI_TPC_PLL, + HL_GAUDI_IF_PLL, + HL_GAUDI_PLL_MAX +}; + +enum hl_gaudi2_pll_index { + HL_GAUDI2_CPU_PLL = 0, + HL_GAUDI2_PCI_PLL, + HL_GAUDI2_SRAM_PLL, + HL_GAUDI2_HBM_PLL, + HL_GAUDI2_NIC_PLL, + HL_GAUDI2_DMA_PLL, + HL_GAUDI2_MESH_PLL, + HL_GAUDI2_MME_PLL, + HL_GAUDI2_TPC_PLL, + HL_GAUDI2_IF_PLL, + HL_GAUDI2_VID_PLL, + HL_GAUDI2_MSS_PLL, + HL_GAUDI2_PLL_MAX +}; + +/** + * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA packet that is + * submitted to the GOYA's DMA QMAN. This attribute is not relevant + * to the H/W but the kernel driver use it to parse the packet's + * addresses and patch/validate them. + * @HL_DMA_HOST_TO_DRAM: DMA operation from Host memory to GOYA's DDR. + * @HL_DMA_HOST_TO_SRAM: DMA operation from Host memory to GOYA's SRAM. + * @HL_DMA_DRAM_TO_SRAM: DMA operation from GOYA's DDR to GOYA's SRAM. + * @HL_DMA_SRAM_TO_DRAM: DMA operation from GOYA's SRAM to GOYA's DDR. + * @HL_DMA_SRAM_TO_HOST: DMA operation from GOYA's SRAM to Host memory. + * @HL_DMA_DRAM_TO_HOST: DMA operation from GOYA's DDR to Host memory. + * @HL_DMA_DRAM_TO_DRAM: DMA operation from GOYA's DDR to GOYA's DDR. + * @HL_DMA_SRAM_TO_SRAM: DMA operation from GOYA's SRAM to GOYA's SRAM. + * @HL_DMA_ENUM_MAX: number of values in enum + */ +enum hl_goya_dma_direction { + HL_DMA_HOST_TO_DRAM, + HL_DMA_HOST_TO_SRAM, + HL_DMA_DRAM_TO_SRAM, + HL_DMA_SRAM_TO_DRAM, + HL_DMA_SRAM_TO_HOST, + HL_DMA_DRAM_TO_HOST, + HL_DMA_DRAM_TO_DRAM, + HL_DMA_SRAM_TO_SRAM, + HL_DMA_ENUM_MAX +}; + +/** + * enum hl_device_status - Device status information. + * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. + * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset. + * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable. + * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled. + * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in + * progress. + * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during reset that was + * triggered because the user released the device + * @HL_DEVICE_STATUS_LAST: Last status. + */ +enum hl_device_status { + HL_DEVICE_STATUS_OPERATIONAL, + HL_DEVICE_STATUS_IN_RESET, + HL_DEVICE_STATUS_MALFUNCTION, + HL_DEVICE_STATUS_NEEDS_RESET, + HL_DEVICE_STATUS_IN_DEVICE_CREATION, + HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE, + HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE +}; + +enum hl_server_type { + HL_SERVER_TYPE_UNKNOWN = 0, + HL_SERVER_GAUDI_HLS1 = 1, + HL_SERVER_GAUDI_HLS1H = 2, + HL_SERVER_GAUDI_TYPE1 = 3, + HL_SERVER_GAUDI_TYPE2 = 4, + HL_SERVER_GAUDI2_HLS2 = 5 +}; + +/* + * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command + * + * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event + * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code + * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset + * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error + * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable + * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state + * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error + * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened + * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened + */ +#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) +#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) +#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) +#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) +#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) +#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) +#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) +#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) +#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) + +/* Opcode for management ioctl + * + * HW_IP_INFO - Receive information about different IP blocks in the + * device. + * HL_INFO_HW_EVENTS - Receive an array describing how many times each event + * occurred since the last hard reset. + * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the + * specific context. This is relevant only for devices + * where the dram is managed by the kernel driver + * HL_INFO_HW_IDLE - Retrieve information about the idle status of each + * internal engine. + * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't + * require an open context. + * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device + * over the last period specified by the user. + * The period can be between 100ms to 1s, in + * resolution of 100ms. The return value is a + * percentage of the utilization rate. + * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each + * event occurred since the driver was loaded. + * HL_INFO_CLK_RATE - Retrieve the current and maximum clock rate + * of the device in MHz. The maximum clock rate is + * configurable via sysfs parameter + * HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset + * operations performed on the device since the last + * time the driver was loaded. + * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time + * for synchronization. + * HL_INFO_CS_COUNTERS - Retrieve command submission counters + * HL_INFO_PCI_COUNTERS - Retrieve PCI counters + * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason + * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore + * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption + * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency + * HL_INFO_POWER - Retrieve power information + * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls + * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info + * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num + * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened + * and CS timeout or razwi error occurred. + * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number. + * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: + * Timestamp of razwi. + * The address which accessing it caused the razwi. + * Razwi initiator. + * Razwi cause, was it a page fault or MMU access error. + * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation + * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot. + * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. + * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd + * HL_INFO_GET_EVENTS - Retrieve the last occurred events + * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. + * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic. + * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. + * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event. + * HL_INFO_FW_GENERIC_REQ - Send generic request to FW. + */ +#define HL_INFO_HW_IP_INFO 0 +#define HL_INFO_HW_EVENTS 1 +#define HL_INFO_DRAM_USAGE 2 +#define HL_INFO_HW_IDLE 3 +#define HL_INFO_DEVICE_STATUS 4 +#define HL_INFO_DEVICE_UTILIZATION 6 +#define HL_INFO_HW_EVENTS_AGGREGATE 7 +#define HL_INFO_CLK_RATE 8 +#define HL_INFO_RESET_COUNT 9 +#define HL_INFO_TIME_SYNC 10 +#define HL_INFO_CS_COUNTERS 11 +#define HL_INFO_PCI_COUNTERS 12 +#define HL_INFO_CLK_THROTTLE_REASON 13 +#define HL_INFO_SYNC_MANAGER 14 +#define HL_INFO_TOTAL_ENERGY 15 +#define HL_INFO_PLL_FREQUENCY 16 +#define HL_INFO_POWER 17 +#define HL_INFO_OPEN_STATS 18 +#define HL_INFO_DRAM_REPLACED_ROWS 21 +#define HL_INFO_DRAM_PENDING_ROWS 22 +#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 +#define HL_INFO_CS_TIMEOUT_EVENT 24 +#define HL_INFO_RAZWI_EVENT 25 +#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26 +#define HL_INFO_SECURED_ATTESTATION 27 +#define HL_INFO_REGISTER_EVENTFD 28 +#define HL_INFO_UNREGISTER_EVENTFD 29 +#define HL_INFO_GET_EVENTS 30 +#define HL_INFO_UNDEFINED_OPCODE_EVENT 31 +#define HL_INFO_ENGINE_STATUS 32 +#define HL_INFO_PAGE_FAULT_EVENT 33 +#define HL_INFO_USER_MAPPINGS 34 +#define HL_INFO_FW_GENERIC_REQ 35 + +#define HL_INFO_VERSION_MAX_LEN 128 +#define HL_INFO_CARD_NAME_MAX_LEN 16 + +/* Maximum buffer size for retrieving engines status */ +#define HL_ENGINES_DATA_MAX_SIZE SZ_1M + +/** + * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC + * @sram_base_address: The first SRAM physical base address that is free to be + * used by the user. + * @dram_base_address: The first DRAM virtual or physical base address that is + * free to be used by the user. + * @dram_size: The DRAM size that is available to the user. + * @sram_size: The SRAM size that is available to the user. + * @num_of_events: The number of events that can be received from the f/w. This + * is needed so the user can what is the size of the h/w events + * array he needs to pass to the kernel when he wants to fetch + * the event counters. + * @device_id: PCI device ID of the ASIC. + * @module_id: Module ID of the ASIC for mezzanine cards in servers + * (From OCP spec). + * @decoder_enabled_mask: Bit-mask that represents which decoders are enabled. + * @first_available_interrupt_id: The first available interrupt ID for the user + * to be used when it works with user interrupts. + * Relevant for Gaudi2 and later. + * @server_type: Server type that the Gaudi ASIC is currently installed in. + * The value is according to enum hl_server_type + * @cpld_version: CPLD version on the board. + * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. + * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler + * in some ASICs. + * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant + * for Goya/Gaudi only. + * @dram_enabled: Whether the DRAM is enabled. + * @security_enabled: Whether security is enabled on device. + * @mme_master_slave_mode: Indicate whether the MME is working in master/slave + * configuration. Relevant for Greco and later. + * @cpucp_version: The CPUCP f/w version. + * @card_name: The card name as passed by the f/w. + * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. + * Relevant for Greco and later. + * @dram_page_size: The DRAM physical page size. + * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. + * Relevant for Gaudi2 and later. + * @number_of_user_interrupts: The number of interrupts that are available to the userspace + * application to use. Relevant for Gaudi2 and later. + * @device_mem_alloc_default_page_size: default page size used in device memory allocation. + * @revision_id: PCI revision ID of the ASIC. + */ +struct hl_info_hw_ip_info { + __u64 sram_base_address; + __u64 dram_base_address; + __u64 dram_size; + __u32 sram_size; + __u32 num_of_events; + __u32 device_id; + __u32 module_id; + __u32 decoder_enabled_mask; + __u16 first_available_interrupt_id; + __u16 server_type; + __u32 cpld_version; + __u32 psoc_pci_pll_nr; + __u32 psoc_pci_pll_nf; + __u32 psoc_pci_pll_od; + __u32 psoc_pci_pll_div_factor; + __u8 tpc_enabled_mask; + __u8 dram_enabled; + __u8 security_enabled; + __u8 mme_master_slave_mode; + __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; + __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; + __u64 tpc_enabled_mask_ext; + __u64 dram_page_size; + __u32 edma_enabled_mask; + __u16 number_of_user_interrupts; + __u16 pad2; + __u64 reserved4; + __u64 device_mem_alloc_default_page_size; + __u64 reserved5; + __u64 reserved6; + __u32 reserved7; + __u8 reserved8; + __u8 revision_id; + __u8 pad[2]; +}; + +struct hl_info_dram_usage { + __u64 dram_free_mem; + __u64 ctx_dram_mem; +}; + +#define HL_BUSY_ENGINES_MASK_EXT_SIZE 4 + +struct hl_info_hw_idle { + __u32 is_idle; + /* + * Bitmask of busy engines. + * Bits definition is according to `enum _engine_id'. + */ + __u32 busy_engines_mask; + + /* + * Extended Bitmask of busy engines. + * Bits definition is according to `enum _engine_id'. + */ + __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE]; +}; + +struct hl_info_device_status { + __u32 status; + __u32 pad; +}; + +struct hl_info_device_utilization { + __u32 utilization; + __u32 pad; +}; + +struct hl_info_clk_rate { + __u32 cur_clk_rate_mhz; + __u32 max_clk_rate_mhz; +}; + +struct hl_info_reset_count { + __u32 hard_reset_cnt; + __u32 soft_reset_cnt; +}; + +struct hl_info_time_sync { + __u64 device_time; + __u64 host_time; +}; + +/** + * struct hl_info_pci_counters - pci counters + * @rx_throughput: PCI rx throughput KBps + * @tx_throughput: PCI tx throughput KBps + * @replay_cnt: PCI replay counter + */ +struct hl_info_pci_counters { + __u64 rx_throughput; + __u64 tx_throughput; + __u64 replay_cnt; +}; + +enum hl_clk_throttling_type { + HL_CLK_THROTTLE_TYPE_POWER, + HL_CLK_THROTTLE_TYPE_THERMAL, + HL_CLK_THROTTLE_TYPE_MAX +}; + +/* clk_throttling_reason masks */ +#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) +#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) + +/** + * struct hl_info_clk_throttle - clock throttling reason + * @clk_throttling_reason: each bit represents a clk throttling reason + * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event + * @clk_throttling_duration_ns: the clock throttle time in nanosec + */ +struct hl_info_clk_throttle { + __u32 clk_throttling_reason; + __u32 pad; + __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; + __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; +}; + +/** + * struct hl_info_energy - device energy information + * @total_energy_consumption: total device energy consumption + */ +struct hl_info_energy { + __u64 total_energy_consumption; +}; + +#define HL_PLL_NUM_OUTPUTS 4 + +struct hl_pll_frequency_info { + __u16 output[HL_PLL_NUM_OUTPUTS]; +}; + +/** + * struct hl_open_stats_info - device open statistics information + * @open_counter: ever growing counter, increased on each successful dev open + * @last_open_period_ms: duration (ms) device was open last time + * @is_compute_ctx_active: Whether there is an active compute context executing + * @compute_ctx_in_release: true if the current compute context is being released + */ +struct hl_open_stats_info { + __u64 open_counter; + __u64 last_open_period_ms; + __u8 is_compute_ctx_active; + __u8 compute_ctx_in_release; + __u8 pad[6]; +}; + +/** + * struct hl_power_info - power information + * @power: power consumption + */ +struct hl_power_info { + __u64 power; +}; + +/** + * struct hl_info_sync_manager - sync manager information + * @first_available_sync_object: first available sob + * @first_available_monitor: first available monitor + * @first_available_cq: first available cq + */ +struct hl_info_sync_manager { + __u32 first_available_sync_object; + __u32 first_available_monitor; + __u32 first_available_cq; + __u32 reserved; +}; + +/** + * struct hl_info_cs_counters - command submission counters + * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue + * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue + * @total_parsing_drop_cnt: total dropped due to error in packet parsing + * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing + * @total_queue_full_drop_cnt: total dropped due to queue full + * @ctx_queue_full_drop_cnt: context dropped due to queue full + * @total_device_in_reset_drop_cnt: total dropped due to device in reset + * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset + * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight + * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight + * @total_validation_drop_cnt: total dropped due to validation error + * @ctx_validation_drop_cnt: context dropped due to validation error + */ +struct hl_info_cs_counters { + __u64 total_out_of_mem_drop_cnt; + __u64 ctx_out_of_mem_drop_cnt; + __u64 total_parsing_drop_cnt; + __u64 ctx_parsing_drop_cnt; + __u64 total_queue_full_drop_cnt; + __u64 ctx_queue_full_drop_cnt; + __u64 total_device_in_reset_drop_cnt; + __u64 ctx_device_in_reset_drop_cnt; + __u64 total_max_cs_in_flight_drop_cnt; + __u64 ctx_max_cs_in_flight_drop_cnt; + __u64 total_validation_drop_cnt; + __u64 ctx_validation_drop_cnt; +}; + +/** + * struct hl_info_last_err_open_dev_time - last error boot information. + * @timestamp: timestamp of last time the device was opened and error occurred. + */ +struct hl_info_last_err_open_dev_time { + __s64 timestamp; +}; + +/** + * struct hl_info_cs_timeout_event - last CS timeout information. + * @timestamp: timestamp when last CS timeout event occurred. + * @seq: sequence number of last CS timeout event. + */ +struct hl_info_cs_timeout_event { + __s64 timestamp; + __u64 seq; +}; + +#define HL_RAZWI_NA_ENG_ID U16_MAX +#define HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR 128 +#define HL_RAZWI_READ BIT(0) +#define HL_RAZWI_WRITE BIT(1) +#define HL_RAZWI_LBW BIT(2) +#define HL_RAZWI_HBW BIT(3) +#define HL_RAZWI_RR BIT(4) +#define HL_RAZWI_ADDR_DEC BIT(5) + +/** + * struct hl_info_razwi_event - razwi information. + * @timestamp: timestamp of razwi. + * @addr: address which accessing it caused razwi. + * @engine_id: engine id of the razwi initiator, if it was initiated by engine that does not + * have engine id it will be set to HL_RAZWI_NA_ENG_ID. If there are several possible + * engines which caused the razwi, it will hold all of them. + * @num_of_possible_engines: contains number of possible engine ids. In some asics, razwi indication + * might be common for several engines and there is no way to get the + * exact engine. In this way, engine_id array will be filled with all + * possible engines caused this razwi. Also, there might be possibility + * in gaudi, where we don't indication on specific engine, in that case + * the value of this parameter will be zero. + * @flags: bitmask for additional data: HL_RAZWI_READ - razwi caused by read operation + * HL_RAZWI_WRITE - razwi caused by write operation + * HL_RAZWI_LBW - razwi caused by lbw fabric transaction + * HL_RAZWI_HBW - razwi caused by hbw fabric transaction + * HL_RAZWI_RR - razwi caused by range register + * HL_RAZWI_ADDR_DEC - razwi caused by address decode error + * Note: this data is not supported by all asics, in that case the relevant bits will not + * be set. + */ +struct hl_info_razwi_event { + __s64 timestamp; + __u64 addr; + __u16 engine_id[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR]; + __u16 num_of_possible_engines; + __u8 flags; + __u8 pad[5]; +}; + +#define MAX_QMAN_STREAMS_INFO 4 +#define OPCODE_INFO_MAX_ADDR_SIZE 8 +/** + * struct hl_info_undefined_opcode_event - info about last undefined opcode error + * @timestamp: timestamp of the undefined opcode error + * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ + * entries. In case all streams array entries are + * filled with values, it means the execution was in Lower-CP. + * @cq_addr: the address of the current handled command buffer + * @cq_size: the size of the current handled command buffer + * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. + * should be equal to 1 in case of undefined opcode + * in Upper-CP (specific stream) and equal to 4 incase + * of undefined opcode in Lower-CP. + * @engine_id: engine-id that the error occurred on + * @stream_id: the stream id the error occurred on. In case the stream equals to + * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. + */ +struct hl_info_undefined_opcode_event { + __s64 timestamp; + __u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; + __u64 cq_addr; + __u32 cq_size; + __u32 cb_addr_streams_len; + __u32 engine_id; + __u32 stream_id; +}; + +/** + * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information. + * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size + * (e.g. 0x2100000 means that 1MB and 32MB pages are supported). + */ +struct hl_info_dev_memalloc_page_sizes { + __u64 page_order_bitmask; +}; + +#define SEC_PCR_DATA_BUF_SZ 256 +#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ +#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ +#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ + +/* + * struct hl_info_sec_attest - attestation report of the boot + * @nonce: number only used once. random number provided by host. this also passed to the quote + * command as a qualifying data. + * @pcr_quote_len: length of the attestation quote data (bytes) + * @pub_data_len: length of the public data (bytes) + * @certificate_len: length of the certificate (bytes) + * @pcr_num_reg: number of PCR registers in the pcr_data array + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) + * @quote_sig_len: length of the attestation report signature (bytes) + * @pcr_data: raw values of the PCR registers + * @pcr_quote: attestation report data structure + * @quote_sig: signature structure of the attestation report + * @public_data: public key for the signed attestation + * (outPublic + name + qualifiedName) + * @certificate: certificate for the attestation signing key + */ +struct hl_info_sec_attest { + __u32 nonce; + __u16 pcr_quote_len; + __u16 pub_data_len; + __u16 certificate_len; + __u8 pcr_num_reg; + __u8 pcr_reg_len; + __u8 quote_sig_len; + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; + __u8 pad0[2]; +}; + +/** + * struct hl_page_fault_info - page fault information. + * @timestamp: timestamp of page fault. + * @addr: address which accessing it caused page fault. + * @engine_id: engine id which caused the page fault, supported only in gaudi3. + */ +struct hl_page_fault_info { + __s64 timestamp; + __u64 addr; + __u16 engine_id; + __u8 pad[6]; +}; + +/** + * struct hl_user_mapping - user mapping information. + * @dev_va: device virtual address. + * @size: virtual address mapping size. + */ +struct hl_user_mapping { + __u64 dev_va; + __u64 size; +}; + +enum gaudi_dcores { + HL_GAUDI_WS_DCORE, + HL_GAUDI_WN_DCORE, + HL_GAUDI_EN_DCORE, + HL_GAUDI_ES_DCORE +}; + +/** + * struct hl_info_args - Main structure to retrieve device related information. + * @return_pointer: User space address of the relevant structure related to HL_INFO_* operation + * mentioned in @op. + * @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it + * limits how many bytes the kernel can write. For hw_events array, the size should be + * hl_info_hw_ip_info.num_of_events * sizeof(__u32). + * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details. + * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores). + * @ctx_id: Context ID of the user. Currently not in use. + * @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms + * resolution. Currently not in use. + * @pll_index: Index as defined in hl__pll_index enumeration. + * @eventfd: event file descriptor for event notifications. + * @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the + * driver. It is possible for the user to allocate buffer larger than + * needed, hence updating this variable so user will know the exact amount + * of bytes copied by the kernel to the buffer. + * @sec_attest_nonce: Nonce number used for attestation report. + * @array_size: Number of array members copied to user buffer. + * Relevant for HL_INFO_USER_MAPPINGS info ioctl. + * @fw_sub_opcode: generic requests sub opcodes. + * @pad: Padding to 64 bit. + */ +struct hl_info_args { + __u64 return_pointer; + __u32 return_size; + __u32 op; + + union { + __u32 dcore_id; + __u32 ctx_id; + __u32 period_ms; + __u32 pll_index; + __u32 eventfd; + __u32 user_buffer_actual_size; + __u32 sec_attest_nonce; + __u32 array_size; + __u32 fw_sub_opcode; + }; + + __u32 pad; +}; + +/* Opcode to create a new command buffer */ +#define HL_CB_OP_CREATE 0 +/* Opcode to destroy previously created command buffer */ +#define HL_CB_OP_DESTROY 1 +/* Opcode to retrieve information about a command buffer */ +#define HL_CB_OP_INFO 2 + +/* 2MB minus 32 bytes for 2xMSG_PROT */ +#define HL_MAX_CB_SIZE (0x200000 - 32) + +/* Indicates whether the command buffer should be mapped to the device's MMU */ +#define HL_CB_FLAGS_MAP 0x1 + +/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ +#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 + +struct hl_cb_in { + /* Handle of CB or 0 if we want to create one */ + __u64 cb_handle; + /* HL_CB_OP_* */ + __u32 op; + + /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that + * will be allocated, regardless of this parameter's value, is PAGE_SIZE + */ + __u32 cb_size; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + /* HL_CB_FLAGS_* */ + __u32 flags; +}; + +struct hl_cb_out { + union { + /* Handle of CB */ + __u64 cb_handle; + + union { + /* Information about CB */ + struct { + /* Usage count of CB */ + __u32 usage_cnt; + __u32 pad; + }; + + /* CB mapped address to device MMU */ + __u64 device_va; + }; + }; +}; + +union hl_cb_args { + struct hl_cb_in in; + struct hl_cb_out out; +}; + +/* HL_CS_CHUNK_FLAGS_ values + * + * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: + * Indicates if the CB was allocated and mapped by userspace + * (relevant to greco and above). User allocated CB is a command buffer, + * allocated by the user, via malloc (or similar). After allocating the + * CB, the user invokes - “memory ioctl” to map the user memory into a + * device virtual address. The user provides this address via the + * cb_handle field. The interface provides the ability to create a + * large CBs, Which aren’t limited to “HL_MAX_CB_SIZE”. Therefore, it + * increases the PCI-DMA queues throughput. This CB allocation method + * also reduces the use of Linux DMA-able memory pool. Which are limited + * and used by other Linux sub-systems. + */ +#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 + +/* + * This structure size must always be fixed to 64-bytes for backward + * compatibility + */ +struct hl_cs_chunk { + union { + /* Goya/Gaudi: + * For external queue, this represents a Handle of CB on the + * Host. + * For internal queue in Goya, this represents an SRAM or + * a DRAM address of the internal CB. In Gaudi, this might also + * represent a mapped host address of the CB. + * + * Greco onwards: + * For H/W queue, this represents either a Handle of CB on the + * Host, or an SRAM, a DRAM, or a mapped host address of the CB. + * + * A mapped host address is in the device address space, after + * a host address was mapped by the device MMU. + */ + __u64 cb_handle; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * This holds address of array of u64 values that contain + * signal CS sequence numbers. The wait described by + * this job will listen on all those signals + * (wait event per signal) + */ + __u64 signal_seq_arr; + + /* + * Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set + * along with HL_CS_FLAGS_ENCAP_SIGNALS. + * This is the CS sequence which has the encapsulated signals. + */ + __u64 encaps_signal_seq; + }; + + /* Index of queue to put the CB on */ + __u32 queue_index; + + union { + /* + * Size of command buffer with valid packets + * Can be smaller then actual CB size + */ + __u32 cb_size; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * Number of entries in signal_seq_arr + */ + __u32 num_signal_seq_arr; + + /* Relevant only when HL_CS_FLAGS_WAIT or + * HL_CS_FLAGS_COLLECTIVE_WAIT is set along + * with HL_CS_FLAGS_ENCAP_SIGNALS + * This set the signals range that the user want to wait for + * out of the whole reserved signals range. + * e.g if the signals range is 20, and user don't want + * to wait for signal 8, so he set this offset to 7, then + * he call the API again with 9 and so on till 20. + */ + __u32 encaps_signal_offset; + }; + + /* HL_CS_CHUNK_FLAGS_* */ + __u32 cs_chunk_flags; + + /* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set. + * This holds the collective engine ID. The wait described by this job + * will sync with this engine and with all NICs before completion. + */ + __u32 collective_engine_id; + + /* Align structure to 64 bytes */ + __u32 pad[10]; +}; + +/* SIGNAL/WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ +#define HL_CS_FLAGS_FORCE_RESTORE 0x1 +#define HL_CS_FLAGS_SIGNAL 0x2 +#define HL_CS_FLAGS_WAIT 0x4 +#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 + +#define HL_CS_FLAGS_TIMESTAMP 0x20 +#define HL_CS_FLAGS_STAGED_SUBMISSION 0x40 +#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 +#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100 +#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 +#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 + +/* + * The encapsulated signals CS is merged into the existing CS ioctls. + * In order to use this feature need to follow the below procedure: + * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY + * the output of this API will be the SOB offset from CFG_BASE. + * this address will be used to patch CB cmds to do the signaling for this + * SOB by incrementing it's value. + * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY + * CS type, note that this might fail if out-of-sync happened to the SOB + * value, in case other signaling request to the same SOB occurred between + * reserve-unreserve calls. + * 2. Use the staged CS to do the encapsulated signaling jobs. + * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset + * field. This offset allows app to wait on part of the reserved signals. + * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag + * to wait for the encapsulated signals. + */ +#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 +#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 +#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 + +/* + * The engine cores CS is merged into the existing CS ioctls. + * Use it to control the engine cores mode. + */ +#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 + +#define HL_CS_STATUS_SUCCESS 0 + +#define HL_MAX_JOBS_PER_CS 512 + +/* HL_ENGINE_CORE_ values + * + * HL_ENGINE_CORE_HALT: engine core halt + * HL_ENGINE_CORE_RUN: engine core run + */ +#define HL_ENGINE_CORE_HALT (1 << 0) +#define HL_ENGINE_CORE_RUN (1 << 1) + +struct hl_cs_in { + + union { + struct { + /* this holds address of array of hl_cs_chunk for restore phase */ + __u64 chunks_restore; + + /* holds address of array of hl_cs_chunk for execution phase */ + __u64 chunks_execute; + }; + + /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ + struct { + /* this holds address of array of uint32 for engine_cores */ + __u64 engine_cores; + + /* number of engine cores in engine_cores array */ + __u32 num_engine_cores; + + /* the core command to be sent towards engine cores */ + __u32 core_command; + }; + }; + + union { + /* + * Sequence number of a staged submission CS + * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. + */ + __u64 seq; + + /* + * Encapsulated signals handle id + * Valid for two flows: + * 1. CS with encapsulated signals: + * when HL_CS_FLAGS_STAGED_SUBMISSION and + * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST + * and HL_CS_FLAGS_ENCAP_SIGNALS are set. + * 2. unreserve signals: + * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. + */ + __u32 encaps_sig_handle_id; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* Encapsulated signals number */ + __u32 encaps_signals_count; + + /* Encapsulated signals queue index (stream) */ + __u32 encaps_signals_q_idx; + }; + }; + + /* Number of chunks in restore phase array. Maximum number is + * HL_MAX_JOBS_PER_CS + */ + __u32 num_chunks_restore; + + /* Number of chunks in execution array. Maximum number is + * HL_MAX_JOBS_PER_CS + */ + __u32 num_chunks_execute; + + /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT + * is set + */ + __u32 timeout; + + /* HL_CS_FLAGS_* */ + __u32 cs_flags; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + __u8 pad[4]; +}; + +struct hl_cs_out { + union { + /* + * seq holds the sequence number of the CS to pass to wait + * ioctl. All values are valid except for 0 and ULLONG_MAX + */ + __u64 seq; + + /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ + struct { + /* This is the reserved signal handle id */ + __u32 handle_id; + + /* This is the signals count */ + __u32 count; + }; + }; + + /* HL_CS_STATUS */ + __u32 status; + + /* + * SOB base address offset + * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set + */ + __u32 sob_base_addr_offset; + + /* + * Count of completed signals in SOB before current signal submission. + * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION) + * or HL_CS_FLAGS_SIGNAL is set + */ + __u16 sob_count_before_submission; + __u16 pad[3]; +}; + +union hl_cs_args { + struct hl_cs_in in; + struct hl_cs_out out; +}; + +#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 +#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT 0xFFF00000 +#define HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT 0xFFE00000 +#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 +#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 +#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 + +#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 + +struct hl_wait_cs_in { + union { + struct { + /* + * In case of wait_cs holds the CS sequence number. + * In case of wait for multi CS hold a user pointer to + * an array of CS sequence numbers + */ + __u64 seq; + /* Absolute timeout to wait for command submission + * in microseconds + */ + __u64 timeout_us; + }; + + struct { + union { + /* User address for completion comparison. + * upon interrupt, driver will compare the value pointed + * by this address with the supplied target value. + * in order not to perform any comparison, set address + * to all 1s. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 addr; + + /* cq_counters_handle to a kernel mapped cb which contains + * cq counters. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_handle; + }; + + /* Target value for completion comparison */ + __u64 target; + }; + }; + + /* Context ID - Currently not in use */ + __u32 ctx_id; + + /* HL_WAIT_CS_FLAGS_* + * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include + * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK + * + * in order to wait for any CQ interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT. + * + * in order to wait for any decoder interrupt, set interrupt value to + * HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT. + */ + __u32 flags; + + union { + struct { + /* Multi CS API info- valid entries in multi-CS array */ + __u8 seq_arr_len; + __u8 pad[7]; + }; + + /* Absolute timeout to wait for an interrupt in microseconds. + * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set + */ + __u64 interrupt_timeout_us; + }; + + /* + * cq counter offset inside the counters cb pointed by cq_counters_handle above. + * upon interrupt, driver will compare the value pointed + * by this address (cq_counters_handle + cq_counters_offset) + * with the supplied target value. + * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set + */ + __u64 cq_counters_offset; + + /* + * Timestamp_handle timestamps buffer handle. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_handle; + + /* + * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above. + * upon interrupt, if the cq reached the target value then driver will write + * timestamp to this offset. + * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set + */ + __u64 timestamp_offset; +}; + +#define HL_WAIT_CS_STATUS_COMPLETED 0 +#define HL_WAIT_CS_STATUS_BUSY 1 +#define HL_WAIT_CS_STATUS_TIMEDOUT 2 +#define HL_WAIT_CS_STATUS_ABORTED 3 + +#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 +#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 + +struct hl_wait_cs_out { + /* HL_WAIT_CS_STATUS_* */ + __u32 status; + /* HL_WAIT_CS_STATUS_FLAG* */ + __u32 flags; + /* + * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set + * for wait_cs: timestamp of CS completion + * for wait_multi_cs: timestamp of FIRST CS completion + */ + __s64 timestamp_nsec; + /* multi CS completion bitmap */ + __u32 cs_completion_map; + __u32 pad; +}; + +union hl_wait_cs_args { + struct hl_wait_cs_in in; + struct hl_wait_cs_out out; +}; + +/* Opcode to allocate device memory */ +#define HL_MEM_OP_ALLOC 0 + +/* Opcode to free previously allocated device memory */ +#define HL_MEM_OP_FREE 1 + +/* Opcode to map host and device memory */ +#define HL_MEM_OP_MAP 2 + +/* Opcode to unmap previously mapped host and device memory */ +#define HL_MEM_OP_UNMAP 3 + +/* Opcode to map a hw block */ +#define HL_MEM_OP_MAP_BLOCK 4 + +/* Opcode to create DMA-BUF object for an existing device memory allocation + * and to export an FD of that DMA-BUF back to the caller + */ +#define HL_MEM_OP_EXPORT_DMABUF_FD 5 + +/* Opcode to create timestamps pool for user interrupts registration support + * The memory will be allocated by the kernel driver, A timestamp buffer which the user + * will get handle to it for mmap, and another internal buffer used by the + * driver for registration management + * The memory will be freed when the user closes the file descriptor(ctx close) + */ +#define HL_MEM_OP_TS_ALLOC 6 + +/* Memory flags */ +#define HL_MEM_CONTIGUOUS 0x1 +#define HL_MEM_SHARED 0x2 +#define HL_MEM_USERPTR 0x4 +#define HL_MEM_FORCE_HINT 0x8 +#define HL_MEM_PREFETCH 0x40 + +/** + * structure hl_mem_in - structure that handle input args for memory IOCTL + * @union arg: union of structures to be used based on the input operation + * @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions). + * @flags: flags for the memory operation (one of the HL_MEM_* definitions). + * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags. + * @ctx_id: context ID - currently not in use. + * @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode. + */ +struct hl_mem_in { + union { + /** + * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) + * @mem_size: memory size to allocate + * @page_size: page size to use on allocation. when the value is 0 the default page + * size will be taken. + */ + struct { + __u64 mem_size; + __u64 page_size; + } alloc; + + /** + * structure for free-ing device memory (used with the HL_MEM_OP_FREE op) + * @handle: handle returned from HL_MEM_OP_ALLOC + */ + struct { + __u64 handle; + } free; + + /** + * structure for mapping device memory (used with the HL_MEM_OP_MAP op) + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @handle: handle returned from HL_MEM_OP_ALLOC. + */ + struct { + __u64 hint_addr; + __u64 handle; + } map_device; + + /** + * structure for mapping host memory (used with the HL_MEM_OP_MAP op) + * @host_virt_addr: address of allocated host memory. + * @hint_addr: requested virtual address of mapped memory. + * the driver will try to map the requested region to this hint + * address, as long as the address is valid and not already mapped. + * the user should check the returned address of the IOCTL to make + * sure he got the hint address. + * passing 0 here means that the driver will choose the address itself. + * @size: size of allocated host memory. + */ + struct { + __u64 host_virt_addr; + __u64 hint_addr; + __u64 mem_size; + } map_host; + + /** + * structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op) + * @block_addr:HW block address to map, a handle and size will be returned + * to the user and will be used to mmap the relevant block. + * only addresses from configuration space are allowed. + */ + struct { + __u64 block_addr; + } map_block; + + /** + * structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op) + * @device_virt_addr: virtual address returned from HL_MEM_OP_MAP + */ + struct { + __u64 device_virt_addr; + } unmap; + + /** + * structure for exporting DMABUF object (used with + * the HL_MEM_OP_EXPORT_DMABUF_FD op) + * @addr: for Gaudi1, the driver expects a physical address + * inside the device's DRAM. this is because in Gaudi1 + * we don't have MMU that covers the device's DRAM. + * for all other ASICs, the driver expects a device + * virtual address that represents the start address of + * a mapped DRAM memory area inside the device. + * the address must be the same as was received from the + * driver during a previous HL_MEM_OP_MAP operation. + * @mem_size: size of memory to export. + * @offset: for Gaudi1, this value must be 0. For all other ASICs, + * the driver expects an offset inside of the memory area + * describe by addr. the offset represents the start + * address of that the exported dma-buf object describes. + */ + struct { + __u64 addr; + __u64 mem_size; + __u64 offset; + } export_dmabuf_fd; + }; + + __u32 op; + __u32 flags; + __u32 ctx_id; + __u32 num_of_elements; +}; + +struct hl_mem_out { + union { + /* + * Used for HL_MEM_OP_MAP as the virtual address that was + * assigned in the device VA space. + * A value of 0 means the requested operation failed. + */ + __u64 device_virt_addr; + + /* + * Used in HL_MEM_OP_ALLOC + * This is the assigned handle for the allocated memory + */ + __u64 handle; + + struct { + /* + * Used in HL_MEM_OP_MAP_BLOCK. + * This is the assigned handle for the mapped block + */ + __u64 block_handle; + + /* + * Used in HL_MEM_OP_MAP_BLOCK + * This is the size of the mapped block + */ + __u32 block_size; + + __u32 pad; + }; + + /* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the + * DMA-BUF object that was created to describe a memory + * allocation on the device's memory space. The FD should be + * passed to the importer driver + */ + __s32 fd; + }; +}; + +union hl_mem_args { + struct hl_mem_in in; + struct hl_mem_out out; +}; + +#define HL_DEBUG_MAX_AUX_VALUES 10 + +struct hl_debug_params_etr { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_etf { + /* Address in memory to allocate buffer */ + __u64 buffer_address; + + /* Size of buffer to allocate */ + __u64 buffer_size; + + /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ + __u32 sink_mode; + __u32 pad; +}; + +struct hl_debug_params_stm { + /* Two bit masks for HW event and Stimulus Port */ + __u64 he_mask; + __u64 sp_mask; + + /* Trace source ID */ + __u32 id; + + /* Frequency for the timestamp register */ + __u32 frequency; +}; + +struct hl_debug_params_bmon { + /* Two address ranges that the user can request to filter */ + __u64 start_addr0; + __u64 addr_mask0; + + __u64 start_addr1; + __u64 addr_mask1; + + /* Capture window configuration */ + __u32 bw_win; + __u32 win_capture; + + /* Trace source ID */ + __u32 id; + + /* Control register */ + __u32 control; + + /* Two more address ranges that the user can request to filter */ + __u64 start_addr2; + __u64 end_addr2; + + __u64 start_addr3; + __u64 end_addr3; +}; + +struct hl_debug_params_spmu { + /* Event types selection */ + __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; + + /* Number of event types selection */ + __u32 event_types_num; + + /* TRC configuration register values */ + __u32 pmtrc_val; + __u32 trc_ctrl_host_val; + __u32 trc_en_host_val; +}; + +/* Opcode for ETR component */ +#define HL_DEBUG_OP_ETR 0 +/* Opcode for ETF component */ +#define HL_DEBUG_OP_ETF 1 +/* Opcode for STM component */ +#define HL_DEBUG_OP_STM 2 +/* Opcode for FUNNEL component */ +#define HL_DEBUG_OP_FUNNEL 3 +/* Opcode for BMON component */ +#define HL_DEBUG_OP_BMON 4 +/* Opcode for SPMU component */ +#define HL_DEBUG_OP_SPMU 5 +/* Opcode for timestamp (deprecated) */ +#define HL_DEBUG_OP_TIMESTAMP 6 +/* Opcode for setting the device into or out of debug mode. The enable + * variable should be 1 for enabling debug mode and 0 for disabling it + */ +#define HL_DEBUG_OP_SET_MODE 7 + +struct hl_debug_args { + /* + * Pointer to user input structure. + * This field is relevant to specific opcodes. + */ + __u64 input_ptr; + /* Pointer to user output structure */ + __u64 output_ptr; + /* Size of user input structure */ + __u32 input_size; + /* Size of user output structure */ + __u32 output_size; + /* HL_DEBUG_OP_* */ + __u32 op; + /* + * Register index in the component, taken from the debug_regs_index enum + * in the various ASIC header files + */ + __u32 reg_idx; + /* Enable/disable */ + __u32 enable; + /* Context ID - Currently not in use */ + __u32 ctx_id; +}; + +/* + * Various information operations such as: + * - H/W IP information + * - Current dram usage + * + * The user calls this IOCTL with an opcode that describes the required + * information. The user should supply a pointer to a user-allocated memory + * chunk, which will be filled by the driver with the requested information. + * + * The user supplies the maximum amount of size to copy into the user's memory, + * in order to prevent data corruption in case of differences between the + * definitions of structures in kernel and userspace, e.g. in case of old + * userspace and new kernel driver + */ +#define HL_IOCTL_INFO \ + _IOWR('H', 0x01, struct hl_info_args) + +/* + * Command Buffer + * - Request a Command Buffer + * - Destroy a Command Buffer + * + * The command buffers are memory blocks that reside in DMA-able address + * space and are physically contiguous so they can be accessed by the device + * directly. They are allocated using the coherent DMA API. + * + * When creating a new CB, the IOCTL returns a handle of it, and the user-space + * process needs to use that handle to mmap the buffer so it can access them. + * + * In some instances, the device must access the command buffer through the + * device's MMU, and thus its memory should be mapped. In these cases, user can + * indicate the driver that such a mapping is required. + * The resulting device virtual address will be used internally by the driver, + * and won't be returned to user. + * + */ +#define HL_IOCTL_CB \ + _IOWR('H', 0x02, union hl_cb_args) + +/* + * Command Submission + * + * To submit work to the device, the user need to call this IOCTL with a set + * of JOBS. That set of JOBS constitutes a CS object. + * Each JOB will be enqueued on a specific queue, according to the user's input. + * There can be more then one JOB per queue. + * + * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase + * and a second set is for "execution" phase. + * The JOBS on the "restore" phase are enqueued only after context-switch + * (or if its the first CS for this context). The user can also order the + * driver to run the "restore" phase explicitly + * + * Goya/Gaudi: + * There are two types of queues - external and internal. External queues + * are DMA queues which transfer data from/to the Host. All other queues are + * internal. The driver will get completion notifications from the device only + * on JOBS which are enqueued in the external queues. + * + * Greco onwards: + * There is a single type of queue for all types of engines, either DMA engines + * for transfers from/to the host or inside the device, or compute engines. + * The driver will get completion notifications from the device for all queues. + * + * For jobs on external queues, the user needs to create command buffers + * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on + * internal queues, the user needs to prepare a "command buffer" with packets + * on either the device SRAM/DRAM or the host, and give the device address of + * that buffer to the CS ioctl. + * For jobs on H/W queues both options of command buffers are valid. + * + * This IOCTL is asynchronous in regard to the actual execution of the CS. This + * means it returns immediately after ALL the JOBS were enqueued on their + * relevant queues. Therefore, the user mustn't assume the CS has been completed + * or has even started to execute. + * + * Upon successful enqueue, the IOCTL returns a sequence number which the user + * can use with the "Wait for CS" IOCTL to check whether the handle's CS + * non-internal JOBS have been completed. Note that if the CS has internal JOBS + * which can execute AFTER the external JOBS have finished, the driver might + * report that the CS has finished executing BEFORE the internal JOBS have + * actually finished executing. + * + * Even though the sequence number increments per CS, the user can NOT + * automatically assume that if CS with sequence number N finished, then CS + * with sequence number N-1 also finished. The user can make this assumption if + * and only if CS N and CS N-1 are exactly the same (same CBs for the same + * queues). + */ +#define HL_IOCTL_CS \ + _IOWR('H', 0x03, union hl_cs_args) + +/* + * Wait for Command Submission + * + * The user can call this IOCTL with a handle it received from the CS IOCTL + * to wait until the handle's CS has finished executing. The user will wait + * inside the kernel until the CS has finished or until the user-requested + * timeout has expired. + * + * If the timeout value is 0, the driver won't sleep at all. It will check + * the status of the CS and return immediately + * + * The return value of the IOCTL is a standard Linux error code. The possible + * values are: + * + * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal + * that the user process received + * ETIMEDOUT - The CS has caused a timeout on the device + * EIO - The CS was aborted (usually because the device was reset) + * ENODEV - The device wants to do hard-reset (so user need to close FD) + * + * The driver also returns a custom define in case the IOCTL call returned 0. + * The define can be one of the following: + * + * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) + * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) + * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device + * (ETIMEDOUT) + * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the + * device was reset (EIO) + */ + +#define HL_IOCTL_WAIT_CS \ + _IOWR('H', 0x04, union hl_wait_cs_args) + +/* + * Memory + * - Map host memory to device MMU + * - Unmap host memory from device MMU + * + * This IOCTL allows the user to map host memory to the device MMU + * + * For host memory, the IOCTL doesn't allocate memory. The user is supposed + * to allocate the memory in user-space (malloc/new). The driver pins the + * physical pages (up to the allowed limit by the OS), assigns a virtual + * address in the device VA space and initializes the device MMU. + * + * There is an option for the user to specify the requested virtual address. + * + */ +#define HL_IOCTL_MEMORY \ + _IOWR('H', 0x05, union hl_mem_args) + +/* + * Debug + * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces + * + * This IOCTL allows the user to get debug traces from the chip. + * + * Before the user can send configuration requests of the various + * debug/profile engines, it needs to set the device into debug mode. + * This is because the debug/profile infrastructure is shared component in the + * device and we can't allow multiple users to access it at the same time. + * + * Once a user set the device into debug mode, the driver won't allow other + * users to "work" with the device, i.e. open a FD. If there are multiple users + * opened on the device, the driver won't allow any user to debug the device. + * + * For each configuration request, the user needs to provide the register index + * and essential data such as buffer address and size. + * + * Once the user has finished using the debug/profile engines, he should + * set the device into non-debug mode, i.e. disable debug mode. + * + * The driver can decide to "kick out" the user if he abuses this interface. + * + */ +#define HL_IOCTL_DEBUG \ + _IOWR('H', 0x06, struct hl_debug_args) + +#define HL_COMMAND_START 0x01 +#define HL_COMMAND_END 0x07 + +#endif /* HABANALABS_H_ */ diff --git a/include/uapi/misc/habanalabs.h b/include/uapi/misc/habanalabs.h deleted file mode 100644 index 90e628779264..000000000000 --- a/include/uapi/misc/habanalabs.h +++ /dev/null @@ -1,2225 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note - * - * Copyright 2016-2022 HabanaLabs, Ltd. - * All Rights Reserved. - * - */ - -#ifndef HABANALABS_H_ -#define HABANALABS_H_ - -#include -#include - -/* - * Defines that are asic-specific but constitutes as ABI between kernel driver - * and userspace - */ -#define GOYA_KMD_SRAM_RESERVED_SIZE_FROM_START 0x8000 /* 32KB */ -#define GAUDI_DRIVER_SRAM_RESERVED_SIZE_FROM_START 0x80 /* 128 bytes */ - -/* - * 128 SOBs reserved for collective wait - * 16 SOBs reserved for sync stream - */ -#define GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT 144 - -/* - * 64 monitors reserved for collective wait - * 8 monitors reserved for sync stream - */ -#define GAUDI_FIRST_AVAILABLE_W_S_MONITOR 72 - -/* Max number of elements in timestamps registration buffers */ -#define TS_MAX_ELEMENTS_NUM (1 << 20) /* 1MB */ - -/* - * Goya queue Numbering - * - * The external queues (PCI DMA channels) MUST be before the internal queues - * and each group (PCI DMA channels and internal) must be contiguous inside - * itself but there can be a gap between the two groups (although not - * recommended) - */ - -enum goya_queue_id { - GOYA_QUEUE_ID_DMA_0 = 0, - GOYA_QUEUE_ID_DMA_1 = 1, - GOYA_QUEUE_ID_DMA_2 = 2, - GOYA_QUEUE_ID_DMA_3 = 3, - GOYA_QUEUE_ID_DMA_4 = 4, - GOYA_QUEUE_ID_CPU_PQ = 5, - GOYA_QUEUE_ID_MME = 6, /* Internal queues start here */ - GOYA_QUEUE_ID_TPC0 = 7, - GOYA_QUEUE_ID_TPC1 = 8, - GOYA_QUEUE_ID_TPC2 = 9, - GOYA_QUEUE_ID_TPC3 = 10, - GOYA_QUEUE_ID_TPC4 = 11, - GOYA_QUEUE_ID_TPC5 = 12, - GOYA_QUEUE_ID_TPC6 = 13, - GOYA_QUEUE_ID_TPC7 = 14, - GOYA_QUEUE_ID_SIZE -}; - -/* - * Gaudi queue Numbering - * External queues (PCI DMA channels) are DMA_0_*, DMA_1_* and DMA_5_*. - * Except one CPU queue, all the rest are internal queues. - */ - -enum gaudi_queue_id { - GAUDI_QUEUE_ID_DMA_0_0 = 0, /* external */ - GAUDI_QUEUE_ID_DMA_0_1 = 1, /* external */ - GAUDI_QUEUE_ID_DMA_0_2 = 2, /* external */ - GAUDI_QUEUE_ID_DMA_0_3 = 3, /* external */ - GAUDI_QUEUE_ID_DMA_1_0 = 4, /* external */ - GAUDI_QUEUE_ID_DMA_1_1 = 5, /* external */ - GAUDI_QUEUE_ID_DMA_1_2 = 6, /* external */ - GAUDI_QUEUE_ID_DMA_1_3 = 7, /* external */ - GAUDI_QUEUE_ID_CPU_PQ = 8, /* CPU */ - GAUDI_QUEUE_ID_DMA_2_0 = 9, /* internal */ - GAUDI_QUEUE_ID_DMA_2_1 = 10, /* internal */ - GAUDI_QUEUE_ID_DMA_2_2 = 11, /* internal */ - GAUDI_QUEUE_ID_DMA_2_3 = 12, /* internal */ - GAUDI_QUEUE_ID_DMA_3_0 = 13, /* internal */ - GAUDI_QUEUE_ID_DMA_3_1 = 14, /* internal */ - GAUDI_QUEUE_ID_DMA_3_2 = 15, /* internal */ - GAUDI_QUEUE_ID_DMA_3_3 = 16, /* internal */ - GAUDI_QUEUE_ID_DMA_4_0 = 17, /* internal */ - GAUDI_QUEUE_ID_DMA_4_1 = 18, /* internal */ - GAUDI_QUEUE_ID_DMA_4_2 = 19, /* internal */ - GAUDI_QUEUE_ID_DMA_4_3 = 20, /* internal */ - GAUDI_QUEUE_ID_DMA_5_0 = 21, /* internal */ - GAUDI_QUEUE_ID_DMA_5_1 = 22, /* internal */ - GAUDI_QUEUE_ID_DMA_5_2 = 23, /* internal */ - GAUDI_QUEUE_ID_DMA_5_3 = 24, /* internal */ - GAUDI_QUEUE_ID_DMA_6_0 = 25, /* internal */ - GAUDI_QUEUE_ID_DMA_6_1 = 26, /* internal */ - GAUDI_QUEUE_ID_DMA_6_2 = 27, /* internal */ - GAUDI_QUEUE_ID_DMA_6_3 = 28, /* internal */ - GAUDI_QUEUE_ID_DMA_7_0 = 29, /* internal */ - GAUDI_QUEUE_ID_DMA_7_1 = 30, /* internal */ - GAUDI_QUEUE_ID_DMA_7_2 = 31, /* internal */ - GAUDI_QUEUE_ID_DMA_7_3 = 32, /* internal */ - GAUDI_QUEUE_ID_MME_0_0 = 33, /* internal */ - GAUDI_QUEUE_ID_MME_0_1 = 34, /* internal */ - GAUDI_QUEUE_ID_MME_0_2 = 35, /* internal */ - GAUDI_QUEUE_ID_MME_0_3 = 36, /* internal */ - GAUDI_QUEUE_ID_MME_1_0 = 37, /* internal */ - GAUDI_QUEUE_ID_MME_1_1 = 38, /* internal */ - GAUDI_QUEUE_ID_MME_1_2 = 39, /* internal */ - GAUDI_QUEUE_ID_MME_1_3 = 40, /* internal */ - GAUDI_QUEUE_ID_TPC_0_0 = 41, /* internal */ - GAUDI_QUEUE_ID_TPC_0_1 = 42, /* internal */ - GAUDI_QUEUE_ID_TPC_0_2 = 43, /* internal */ - GAUDI_QUEUE_ID_TPC_0_3 = 44, /* internal */ - GAUDI_QUEUE_ID_TPC_1_0 = 45, /* internal */ - GAUDI_QUEUE_ID_TPC_1_1 = 46, /* internal */ - GAUDI_QUEUE_ID_TPC_1_2 = 47, /* internal */ - GAUDI_QUEUE_ID_TPC_1_3 = 48, /* internal */ - GAUDI_QUEUE_ID_TPC_2_0 = 49, /* internal */ - GAUDI_QUEUE_ID_TPC_2_1 = 50, /* internal */ - GAUDI_QUEUE_ID_TPC_2_2 = 51, /* internal */ - GAUDI_QUEUE_ID_TPC_2_3 = 52, /* internal */ - GAUDI_QUEUE_ID_TPC_3_0 = 53, /* internal */ - GAUDI_QUEUE_ID_TPC_3_1 = 54, /* internal */ - GAUDI_QUEUE_ID_TPC_3_2 = 55, /* internal */ - GAUDI_QUEUE_ID_TPC_3_3 = 56, /* internal */ - GAUDI_QUEUE_ID_TPC_4_0 = 57, /* internal */ - GAUDI_QUEUE_ID_TPC_4_1 = 58, /* internal */ - GAUDI_QUEUE_ID_TPC_4_2 = 59, /* internal */ - GAUDI_QUEUE_ID_TPC_4_3 = 60, /* internal */ - GAUDI_QUEUE_ID_TPC_5_0 = 61, /* internal */ - GAUDI_QUEUE_ID_TPC_5_1 = 62, /* internal */ - GAUDI_QUEUE_ID_TPC_5_2 = 63, /* internal */ - GAUDI_QUEUE_ID_TPC_5_3 = 64, /* internal */ - GAUDI_QUEUE_ID_TPC_6_0 = 65, /* internal */ - GAUDI_QUEUE_ID_TPC_6_1 = 66, /* internal */ - GAUDI_QUEUE_ID_TPC_6_2 = 67, /* internal */ - GAUDI_QUEUE_ID_TPC_6_3 = 68, /* internal */ - GAUDI_QUEUE_ID_TPC_7_0 = 69, /* internal */ - GAUDI_QUEUE_ID_TPC_7_1 = 70, /* internal */ - GAUDI_QUEUE_ID_TPC_7_2 = 71, /* internal */ - GAUDI_QUEUE_ID_TPC_7_3 = 72, /* internal */ - GAUDI_QUEUE_ID_NIC_0_0 = 73, /* internal */ - GAUDI_QUEUE_ID_NIC_0_1 = 74, /* internal */ - GAUDI_QUEUE_ID_NIC_0_2 = 75, /* internal */ - GAUDI_QUEUE_ID_NIC_0_3 = 76, /* internal */ - GAUDI_QUEUE_ID_NIC_1_0 = 77, /* internal */ - GAUDI_QUEUE_ID_NIC_1_1 = 78, /* internal */ - GAUDI_QUEUE_ID_NIC_1_2 = 79, /* internal */ - GAUDI_QUEUE_ID_NIC_1_3 = 80, /* internal */ - GAUDI_QUEUE_ID_NIC_2_0 = 81, /* internal */ - GAUDI_QUEUE_ID_NIC_2_1 = 82, /* internal */ - GAUDI_QUEUE_ID_NIC_2_2 = 83, /* internal */ - GAUDI_QUEUE_ID_NIC_2_3 = 84, /* internal */ - GAUDI_QUEUE_ID_NIC_3_0 = 85, /* internal */ - GAUDI_QUEUE_ID_NIC_3_1 = 86, /* internal */ - GAUDI_QUEUE_ID_NIC_3_2 = 87, /* internal */ - GAUDI_QUEUE_ID_NIC_3_3 = 88, /* internal */ - GAUDI_QUEUE_ID_NIC_4_0 = 89, /* internal */ - GAUDI_QUEUE_ID_NIC_4_1 = 90, /* internal */ - GAUDI_QUEUE_ID_NIC_4_2 = 91, /* internal */ - GAUDI_QUEUE_ID_NIC_4_3 = 92, /* internal */ - GAUDI_QUEUE_ID_NIC_5_0 = 93, /* internal */ - GAUDI_QUEUE_ID_NIC_5_1 = 94, /* internal */ - GAUDI_QUEUE_ID_NIC_5_2 = 95, /* internal */ - GAUDI_QUEUE_ID_NIC_5_3 = 96, /* internal */ - GAUDI_QUEUE_ID_NIC_6_0 = 97, /* internal */ - GAUDI_QUEUE_ID_NIC_6_1 = 98, /* internal */ - GAUDI_QUEUE_ID_NIC_6_2 = 99, /* internal */ - GAUDI_QUEUE_ID_NIC_6_3 = 100, /* internal */ - GAUDI_QUEUE_ID_NIC_7_0 = 101, /* internal */ - GAUDI_QUEUE_ID_NIC_7_1 = 102, /* internal */ - GAUDI_QUEUE_ID_NIC_7_2 = 103, /* internal */ - GAUDI_QUEUE_ID_NIC_7_3 = 104, /* internal */ - GAUDI_QUEUE_ID_NIC_8_0 = 105, /* internal */ - GAUDI_QUEUE_ID_NIC_8_1 = 106, /* internal */ - GAUDI_QUEUE_ID_NIC_8_2 = 107, /* internal */ - GAUDI_QUEUE_ID_NIC_8_3 = 108, /* internal */ - GAUDI_QUEUE_ID_NIC_9_0 = 109, /* internal */ - GAUDI_QUEUE_ID_NIC_9_1 = 110, /* internal */ - GAUDI_QUEUE_ID_NIC_9_2 = 111, /* internal */ - GAUDI_QUEUE_ID_NIC_9_3 = 112, /* internal */ - GAUDI_QUEUE_ID_SIZE -}; - -/* - * In GAUDI2 we have two modes of operation in regard to queues: - * 1. Legacy mode, where each QMAN exposes 4 streams to the user - * 2. F/W mode, where we use F/W to schedule the JOBS to the different queues. - * - * When in legacy mode, the user sends the queue id per JOB according to - * enum gaudi2_queue_id below. - * - * When in F/W mode, the user sends a stream id per Command Submission. The - * stream id is a running number from 0 up to (N-1), where N is the number - * of streams the F/W exposes and is passed to the user in - * struct hl_info_hw_ip_info - */ - -enum gaudi2_queue_id { - GAUDI2_QUEUE_ID_PDMA_0_0 = 0, - GAUDI2_QUEUE_ID_PDMA_0_1 = 1, - GAUDI2_QUEUE_ID_PDMA_0_2 = 2, - GAUDI2_QUEUE_ID_PDMA_0_3 = 3, - GAUDI2_QUEUE_ID_PDMA_1_0 = 4, - GAUDI2_QUEUE_ID_PDMA_1_1 = 5, - GAUDI2_QUEUE_ID_PDMA_1_2 = 6, - GAUDI2_QUEUE_ID_PDMA_1_3 = 7, - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0 = 8, - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1 = 9, - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2 = 10, - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3 = 11, - GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0 = 12, - GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1 = 13, - GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2 = 14, - GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3 = 15, - GAUDI2_QUEUE_ID_DCORE0_MME_0_0 = 16, - GAUDI2_QUEUE_ID_DCORE0_MME_0_1 = 17, - GAUDI2_QUEUE_ID_DCORE0_MME_0_2 = 18, - GAUDI2_QUEUE_ID_DCORE0_MME_0_3 = 19, - GAUDI2_QUEUE_ID_DCORE0_TPC_0_0 = 20, - GAUDI2_QUEUE_ID_DCORE0_TPC_0_1 = 21, - GAUDI2_QUEUE_ID_DCORE0_TPC_0_2 = 22, - GAUDI2_QUEUE_ID_DCORE0_TPC_0_3 = 23, - GAUDI2_QUEUE_ID_DCORE0_TPC_1_0 = 24, - GAUDI2_QUEUE_ID_DCORE0_TPC_1_1 = 25, - GAUDI2_QUEUE_ID_DCORE0_TPC_1_2 = 26, - GAUDI2_QUEUE_ID_DCORE0_TPC_1_3 = 27, - GAUDI2_QUEUE_ID_DCORE0_TPC_2_0 = 28, - GAUDI2_QUEUE_ID_DCORE0_TPC_2_1 = 29, - GAUDI2_QUEUE_ID_DCORE0_TPC_2_2 = 30, - GAUDI2_QUEUE_ID_DCORE0_TPC_2_3 = 31, - GAUDI2_QUEUE_ID_DCORE0_TPC_3_0 = 32, - GAUDI2_QUEUE_ID_DCORE0_TPC_3_1 = 33, - GAUDI2_QUEUE_ID_DCORE0_TPC_3_2 = 34, - GAUDI2_QUEUE_ID_DCORE0_TPC_3_3 = 35, - GAUDI2_QUEUE_ID_DCORE0_TPC_4_0 = 36, - GAUDI2_QUEUE_ID_DCORE0_TPC_4_1 = 37, - GAUDI2_QUEUE_ID_DCORE0_TPC_4_2 = 38, - GAUDI2_QUEUE_ID_DCORE0_TPC_4_3 = 39, - GAUDI2_QUEUE_ID_DCORE0_TPC_5_0 = 40, - GAUDI2_QUEUE_ID_DCORE0_TPC_5_1 = 41, - GAUDI2_QUEUE_ID_DCORE0_TPC_5_2 = 42, - GAUDI2_QUEUE_ID_DCORE0_TPC_5_3 = 43, - GAUDI2_QUEUE_ID_DCORE0_TPC_6_0 = 44, - GAUDI2_QUEUE_ID_DCORE0_TPC_6_1 = 45, - GAUDI2_QUEUE_ID_DCORE0_TPC_6_2 = 46, - GAUDI2_QUEUE_ID_DCORE0_TPC_6_3 = 47, - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0 = 48, - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1 = 49, - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2 = 50, - GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3 = 51, - GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0 = 52, - GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1 = 53, - GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2 = 54, - GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3 = 55, - GAUDI2_QUEUE_ID_DCORE1_MME_0_0 = 56, - GAUDI2_QUEUE_ID_DCORE1_MME_0_1 = 57, - GAUDI2_QUEUE_ID_DCORE1_MME_0_2 = 58, - GAUDI2_QUEUE_ID_DCORE1_MME_0_3 = 59, - GAUDI2_QUEUE_ID_DCORE1_TPC_0_0 = 60, - GAUDI2_QUEUE_ID_DCORE1_TPC_0_1 = 61, - GAUDI2_QUEUE_ID_DCORE1_TPC_0_2 = 62, - GAUDI2_QUEUE_ID_DCORE1_TPC_0_3 = 63, - GAUDI2_QUEUE_ID_DCORE1_TPC_1_0 = 64, - GAUDI2_QUEUE_ID_DCORE1_TPC_1_1 = 65, - GAUDI2_QUEUE_ID_DCORE1_TPC_1_2 = 66, - GAUDI2_QUEUE_ID_DCORE1_TPC_1_3 = 67, - GAUDI2_QUEUE_ID_DCORE1_TPC_2_0 = 68, - GAUDI2_QUEUE_ID_DCORE1_TPC_2_1 = 69, - GAUDI2_QUEUE_ID_DCORE1_TPC_2_2 = 70, - GAUDI2_QUEUE_ID_DCORE1_TPC_2_3 = 71, - GAUDI2_QUEUE_ID_DCORE1_TPC_3_0 = 72, - GAUDI2_QUEUE_ID_DCORE1_TPC_3_1 = 73, - GAUDI2_QUEUE_ID_DCORE1_TPC_3_2 = 74, - GAUDI2_QUEUE_ID_DCORE1_TPC_3_3 = 75, - GAUDI2_QUEUE_ID_DCORE1_TPC_4_0 = 76, - GAUDI2_QUEUE_ID_DCORE1_TPC_4_1 = 77, - GAUDI2_QUEUE_ID_DCORE1_TPC_4_2 = 78, - GAUDI2_QUEUE_ID_DCORE1_TPC_4_3 = 79, - GAUDI2_QUEUE_ID_DCORE1_TPC_5_0 = 80, - GAUDI2_QUEUE_ID_DCORE1_TPC_5_1 = 81, - GAUDI2_QUEUE_ID_DCORE1_TPC_5_2 = 82, - GAUDI2_QUEUE_ID_DCORE1_TPC_5_3 = 83, - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0 = 84, - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1 = 85, - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2 = 86, - GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3 = 87, - GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0 = 88, - GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1 = 89, - GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2 = 90, - GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3 = 91, - GAUDI2_QUEUE_ID_DCORE2_MME_0_0 = 92, - GAUDI2_QUEUE_ID_DCORE2_MME_0_1 = 93, - GAUDI2_QUEUE_ID_DCORE2_MME_0_2 = 94, - GAUDI2_QUEUE_ID_DCORE2_MME_0_3 = 95, - GAUDI2_QUEUE_ID_DCORE2_TPC_0_0 = 96, - GAUDI2_QUEUE_ID_DCORE2_TPC_0_1 = 97, - GAUDI2_QUEUE_ID_DCORE2_TPC_0_2 = 98, - GAUDI2_QUEUE_ID_DCORE2_TPC_0_3 = 99, - GAUDI2_QUEUE_ID_DCORE2_TPC_1_0 = 100, - GAUDI2_QUEUE_ID_DCORE2_TPC_1_1 = 101, - GAUDI2_QUEUE_ID_DCORE2_TPC_1_2 = 102, - GAUDI2_QUEUE_ID_DCORE2_TPC_1_3 = 103, - GAUDI2_QUEUE_ID_DCORE2_TPC_2_0 = 104, - GAUDI2_QUEUE_ID_DCORE2_TPC_2_1 = 105, - GAUDI2_QUEUE_ID_DCORE2_TPC_2_2 = 106, - GAUDI2_QUEUE_ID_DCORE2_TPC_2_3 = 107, - GAUDI2_QUEUE_ID_DCORE2_TPC_3_0 = 108, - GAUDI2_QUEUE_ID_DCORE2_TPC_3_1 = 109, - GAUDI2_QUEUE_ID_DCORE2_TPC_3_2 = 110, - GAUDI2_QUEUE_ID_DCORE2_TPC_3_3 = 111, - GAUDI2_QUEUE_ID_DCORE2_TPC_4_0 = 112, - GAUDI2_QUEUE_ID_DCORE2_TPC_4_1 = 113, - GAUDI2_QUEUE_ID_DCORE2_TPC_4_2 = 114, - GAUDI2_QUEUE_ID_DCORE2_TPC_4_3 = 115, - GAUDI2_QUEUE_ID_DCORE2_TPC_5_0 = 116, - GAUDI2_QUEUE_ID_DCORE2_TPC_5_1 = 117, - GAUDI2_QUEUE_ID_DCORE2_TPC_5_2 = 118, - GAUDI2_QUEUE_ID_DCORE2_TPC_5_3 = 119, - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0 = 120, - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1 = 121, - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2 = 122, - GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3 = 123, - GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0 = 124, - GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1 = 125, - GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2 = 126, - GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3 = 127, - GAUDI2_QUEUE_ID_DCORE3_MME_0_0 = 128, - GAUDI2_QUEUE_ID_DCORE3_MME_0_1 = 129, - GAUDI2_QUEUE_ID_DCORE3_MME_0_2 = 130, - GAUDI2_QUEUE_ID_DCORE3_MME_0_3 = 131, - GAUDI2_QUEUE_ID_DCORE3_TPC_0_0 = 132, - GAUDI2_QUEUE_ID_DCORE3_TPC_0_1 = 133, - GAUDI2_QUEUE_ID_DCORE3_TPC_0_2 = 134, - GAUDI2_QUEUE_ID_DCORE3_TPC_0_3 = 135, - GAUDI2_QUEUE_ID_DCORE3_TPC_1_0 = 136, - GAUDI2_QUEUE_ID_DCORE3_TPC_1_1 = 137, - GAUDI2_QUEUE_ID_DCORE3_TPC_1_2 = 138, - GAUDI2_QUEUE_ID_DCORE3_TPC_1_3 = 139, - GAUDI2_QUEUE_ID_DCORE3_TPC_2_0 = 140, - GAUDI2_QUEUE_ID_DCORE3_TPC_2_1 = 141, - GAUDI2_QUEUE_ID_DCORE3_TPC_2_2 = 142, - GAUDI2_QUEUE_ID_DCORE3_TPC_2_3 = 143, - GAUDI2_QUEUE_ID_DCORE3_TPC_3_0 = 144, - GAUDI2_QUEUE_ID_DCORE3_TPC_3_1 = 145, - GAUDI2_QUEUE_ID_DCORE3_TPC_3_2 = 146, - GAUDI2_QUEUE_ID_DCORE3_TPC_3_3 = 147, - GAUDI2_QUEUE_ID_DCORE3_TPC_4_0 = 148, - GAUDI2_QUEUE_ID_DCORE3_TPC_4_1 = 149, - GAUDI2_QUEUE_ID_DCORE3_TPC_4_2 = 150, - GAUDI2_QUEUE_ID_DCORE3_TPC_4_3 = 151, - GAUDI2_QUEUE_ID_DCORE3_TPC_5_0 = 152, - GAUDI2_QUEUE_ID_DCORE3_TPC_5_1 = 153, - GAUDI2_QUEUE_ID_DCORE3_TPC_5_2 = 154, - GAUDI2_QUEUE_ID_DCORE3_TPC_5_3 = 155, - GAUDI2_QUEUE_ID_NIC_0_0 = 156, - GAUDI2_QUEUE_ID_NIC_0_1 = 157, - GAUDI2_QUEUE_ID_NIC_0_2 = 158, - GAUDI2_QUEUE_ID_NIC_0_3 = 159, - GAUDI2_QUEUE_ID_NIC_1_0 = 160, - GAUDI2_QUEUE_ID_NIC_1_1 = 161, - GAUDI2_QUEUE_ID_NIC_1_2 = 162, - GAUDI2_QUEUE_ID_NIC_1_3 = 163, - GAUDI2_QUEUE_ID_NIC_2_0 = 164, - GAUDI2_QUEUE_ID_NIC_2_1 = 165, - GAUDI2_QUEUE_ID_NIC_2_2 = 166, - GAUDI2_QUEUE_ID_NIC_2_3 = 167, - GAUDI2_QUEUE_ID_NIC_3_0 = 168, - GAUDI2_QUEUE_ID_NIC_3_1 = 169, - GAUDI2_QUEUE_ID_NIC_3_2 = 170, - GAUDI2_QUEUE_ID_NIC_3_3 = 171, - GAUDI2_QUEUE_ID_NIC_4_0 = 172, - GAUDI2_QUEUE_ID_NIC_4_1 = 173, - GAUDI2_QUEUE_ID_NIC_4_2 = 174, - GAUDI2_QUEUE_ID_NIC_4_3 = 175, - GAUDI2_QUEUE_ID_NIC_5_0 = 176, - GAUDI2_QUEUE_ID_NIC_5_1 = 177, - GAUDI2_QUEUE_ID_NIC_5_2 = 178, - GAUDI2_QUEUE_ID_NIC_5_3 = 179, - GAUDI2_QUEUE_ID_NIC_6_0 = 180, - GAUDI2_QUEUE_ID_NIC_6_1 = 181, - GAUDI2_QUEUE_ID_NIC_6_2 = 182, - GAUDI2_QUEUE_ID_NIC_6_3 = 183, - GAUDI2_QUEUE_ID_NIC_7_0 = 184, - GAUDI2_QUEUE_ID_NIC_7_1 = 185, - GAUDI2_QUEUE_ID_NIC_7_2 = 186, - GAUDI2_QUEUE_ID_NIC_7_3 = 187, - GAUDI2_QUEUE_ID_NIC_8_0 = 188, - GAUDI2_QUEUE_ID_NIC_8_1 = 189, - GAUDI2_QUEUE_ID_NIC_8_2 = 190, - GAUDI2_QUEUE_ID_NIC_8_3 = 191, - GAUDI2_QUEUE_ID_NIC_9_0 = 192, - GAUDI2_QUEUE_ID_NIC_9_1 = 193, - GAUDI2_QUEUE_ID_NIC_9_2 = 194, - GAUDI2_QUEUE_ID_NIC_9_3 = 195, - GAUDI2_QUEUE_ID_NIC_10_0 = 196, - GAUDI2_QUEUE_ID_NIC_10_1 = 197, - GAUDI2_QUEUE_ID_NIC_10_2 = 198, - GAUDI2_QUEUE_ID_NIC_10_3 = 199, - GAUDI2_QUEUE_ID_NIC_11_0 = 200, - GAUDI2_QUEUE_ID_NIC_11_1 = 201, - GAUDI2_QUEUE_ID_NIC_11_2 = 202, - GAUDI2_QUEUE_ID_NIC_11_3 = 203, - GAUDI2_QUEUE_ID_NIC_12_0 = 204, - GAUDI2_QUEUE_ID_NIC_12_1 = 205, - GAUDI2_QUEUE_ID_NIC_12_2 = 206, - GAUDI2_QUEUE_ID_NIC_12_3 = 207, - GAUDI2_QUEUE_ID_NIC_13_0 = 208, - GAUDI2_QUEUE_ID_NIC_13_1 = 209, - GAUDI2_QUEUE_ID_NIC_13_2 = 210, - GAUDI2_QUEUE_ID_NIC_13_3 = 211, - GAUDI2_QUEUE_ID_NIC_14_0 = 212, - GAUDI2_QUEUE_ID_NIC_14_1 = 213, - GAUDI2_QUEUE_ID_NIC_14_2 = 214, - GAUDI2_QUEUE_ID_NIC_14_3 = 215, - GAUDI2_QUEUE_ID_NIC_15_0 = 216, - GAUDI2_QUEUE_ID_NIC_15_1 = 217, - GAUDI2_QUEUE_ID_NIC_15_2 = 218, - GAUDI2_QUEUE_ID_NIC_15_3 = 219, - GAUDI2_QUEUE_ID_NIC_16_0 = 220, - GAUDI2_QUEUE_ID_NIC_16_1 = 221, - GAUDI2_QUEUE_ID_NIC_16_2 = 222, - GAUDI2_QUEUE_ID_NIC_16_3 = 223, - GAUDI2_QUEUE_ID_NIC_17_0 = 224, - GAUDI2_QUEUE_ID_NIC_17_1 = 225, - GAUDI2_QUEUE_ID_NIC_17_2 = 226, - GAUDI2_QUEUE_ID_NIC_17_3 = 227, - GAUDI2_QUEUE_ID_NIC_18_0 = 228, - GAUDI2_QUEUE_ID_NIC_18_1 = 229, - GAUDI2_QUEUE_ID_NIC_18_2 = 230, - GAUDI2_QUEUE_ID_NIC_18_3 = 231, - GAUDI2_QUEUE_ID_NIC_19_0 = 232, - GAUDI2_QUEUE_ID_NIC_19_1 = 233, - GAUDI2_QUEUE_ID_NIC_19_2 = 234, - GAUDI2_QUEUE_ID_NIC_19_3 = 235, - GAUDI2_QUEUE_ID_NIC_20_0 = 236, - GAUDI2_QUEUE_ID_NIC_20_1 = 237, - GAUDI2_QUEUE_ID_NIC_20_2 = 238, - GAUDI2_QUEUE_ID_NIC_20_3 = 239, - GAUDI2_QUEUE_ID_NIC_21_0 = 240, - GAUDI2_QUEUE_ID_NIC_21_1 = 241, - GAUDI2_QUEUE_ID_NIC_21_2 = 242, - GAUDI2_QUEUE_ID_NIC_21_3 = 243, - GAUDI2_QUEUE_ID_NIC_22_0 = 244, - GAUDI2_QUEUE_ID_NIC_22_1 = 245, - GAUDI2_QUEUE_ID_NIC_22_2 = 246, - GAUDI2_QUEUE_ID_NIC_22_3 = 247, - GAUDI2_QUEUE_ID_NIC_23_0 = 248, - GAUDI2_QUEUE_ID_NIC_23_1 = 249, - GAUDI2_QUEUE_ID_NIC_23_2 = 250, - GAUDI2_QUEUE_ID_NIC_23_3 = 251, - GAUDI2_QUEUE_ID_ROT_0_0 = 252, - GAUDI2_QUEUE_ID_ROT_0_1 = 253, - GAUDI2_QUEUE_ID_ROT_0_2 = 254, - GAUDI2_QUEUE_ID_ROT_0_3 = 255, - GAUDI2_QUEUE_ID_ROT_1_0 = 256, - GAUDI2_QUEUE_ID_ROT_1_1 = 257, - GAUDI2_QUEUE_ID_ROT_1_2 = 258, - GAUDI2_QUEUE_ID_ROT_1_3 = 259, - GAUDI2_QUEUE_ID_CPU_PQ = 260, - GAUDI2_QUEUE_ID_SIZE -}; - -/* - * Engine Numbering - * - * Used in the "busy_engines_mask" field in `struct hl_info_hw_idle' - */ - -enum goya_engine_id { - GOYA_ENGINE_ID_DMA_0 = 0, - GOYA_ENGINE_ID_DMA_1, - GOYA_ENGINE_ID_DMA_2, - GOYA_ENGINE_ID_DMA_3, - GOYA_ENGINE_ID_DMA_4, - GOYA_ENGINE_ID_MME_0, - GOYA_ENGINE_ID_TPC_0, - GOYA_ENGINE_ID_TPC_1, - GOYA_ENGINE_ID_TPC_2, - GOYA_ENGINE_ID_TPC_3, - GOYA_ENGINE_ID_TPC_4, - GOYA_ENGINE_ID_TPC_5, - GOYA_ENGINE_ID_TPC_6, - GOYA_ENGINE_ID_TPC_7, - GOYA_ENGINE_ID_SIZE -}; - -enum gaudi_engine_id { - GAUDI_ENGINE_ID_DMA_0 = 0, - GAUDI_ENGINE_ID_DMA_1, - GAUDI_ENGINE_ID_DMA_2, - GAUDI_ENGINE_ID_DMA_3, - GAUDI_ENGINE_ID_DMA_4, - GAUDI_ENGINE_ID_DMA_5, - GAUDI_ENGINE_ID_DMA_6, - GAUDI_ENGINE_ID_DMA_7, - GAUDI_ENGINE_ID_MME_0, - GAUDI_ENGINE_ID_MME_1, - GAUDI_ENGINE_ID_MME_2, - GAUDI_ENGINE_ID_MME_3, - GAUDI_ENGINE_ID_TPC_0, - GAUDI_ENGINE_ID_TPC_1, - GAUDI_ENGINE_ID_TPC_2, - GAUDI_ENGINE_ID_TPC_3, - GAUDI_ENGINE_ID_TPC_4, - GAUDI_ENGINE_ID_TPC_5, - GAUDI_ENGINE_ID_TPC_6, - GAUDI_ENGINE_ID_TPC_7, - GAUDI_ENGINE_ID_NIC_0, - GAUDI_ENGINE_ID_NIC_1, - GAUDI_ENGINE_ID_NIC_2, - GAUDI_ENGINE_ID_NIC_3, - GAUDI_ENGINE_ID_NIC_4, - GAUDI_ENGINE_ID_NIC_5, - GAUDI_ENGINE_ID_NIC_6, - GAUDI_ENGINE_ID_NIC_7, - GAUDI_ENGINE_ID_NIC_8, - GAUDI_ENGINE_ID_NIC_9, - GAUDI_ENGINE_ID_SIZE -}; - -enum gaudi2_engine_id { - GAUDI2_DCORE0_ENGINE_ID_EDMA_0 = 0, - GAUDI2_DCORE0_ENGINE_ID_EDMA_1, - GAUDI2_DCORE0_ENGINE_ID_MME, - GAUDI2_DCORE0_ENGINE_ID_TPC_0, - GAUDI2_DCORE0_ENGINE_ID_TPC_1, - GAUDI2_DCORE0_ENGINE_ID_TPC_2, - GAUDI2_DCORE0_ENGINE_ID_TPC_3, - GAUDI2_DCORE0_ENGINE_ID_TPC_4, - GAUDI2_DCORE0_ENGINE_ID_TPC_5, - GAUDI2_DCORE0_ENGINE_ID_DEC_0, - GAUDI2_DCORE0_ENGINE_ID_DEC_1, - GAUDI2_DCORE1_ENGINE_ID_EDMA_0, - GAUDI2_DCORE1_ENGINE_ID_EDMA_1, - GAUDI2_DCORE1_ENGINE_ID_MME, - GAUDI2_DCORE1_ENGINE_ID_TPC_0, - GAUDI2_DCORE1_ENGINE_ID_TPC_1, - GAUDI2_DCORE1_ENGINE_ID_TPC_2, - GAUDI2_DCORE1_ENGINE_ID_TPC_3, - GAUDI2_DCORE1_ENGINE_ID_TPC_4, - GAUDI2_DCORE1_ENGINE_ID_TPC_5, - GAUDI2_DCORE1_ENGINE_ID_DEC_0, - GAUDI2_DCORE1_ENGINE_ID_DEC_1, - GAUDI2_DCORE2_ENGINE_ID_EDMA_0, - GAUDI2_DCORE2_ENGINE_ID_EDMA_1, - GAUDI2_DCORE2_ENGINE_ID_MME, - GAUDI2_DCORE2_ENGINE_ID_TPC_0, - GAUDI2_DCORE2_ENGINE_ID_TPC_1, - GAUDI2_DCORE2_ENGINE_ID_TPC_2, - GAUDI2_DCORE2_ENGINE_ID_TPC_3, - GAUDI2_DCORE2_ENGINE_ID_TPC_4, - GAUDI2_DCORE2_ENGINE_ID_TPC_5, - GAUDI2_DCORE2_ENGINE_ID_DEC_0, - GAUDI2_DCORE2_ENGINE_ID_DEC_1, - GAUDI2_DCORE3_ENGINE_ID_EDMA_0, - GAUDI2_DCORE3_ENGINE_ID_EDMA_1, - GAUDI2_DCORE3_ENGINE_ID_MME, - GAUDI2_DCORE3_ENGINE_ID_TPC_0, - GAUDI2_DCORE3_ENGINE_ID_TPC_1, - GAUDI2_DCORE3_ENGINE_ID_TPC_2, - GAUDI2_DCORE3_ENGINE_ID_TPC_3, - GAUDI2_DCORE3_ENGINE_ID_TPC_4, - GAUDI2_DCORE3_ENGINE_ID_TPC_5, - GAUDI2_DCORE3_ENGINE_ID_DEC_0, - GAUDI2_DCORE3_ENGINE_ID_DEC_1, - GAUDI2_DCORE0_ENGINE_ID_TPC_6, - GAUDI2_ENGINE_ID_PDMA_0, - GAUDI2_ENGINE_ID_PDMA_1, - GAUDI2_ENGINE_ID_ROT_0, - GAUDI2_ENGINE_ID_ROT_1, - GAUDI2_PCIE_ENGINE_ID_DEC_0, - GAUDI2_PCIE_ENGINE_ID_DEC_1, - GAUDI2_ENGINE_ID_NIC0_0, - GAUDI2_ENGINE_ID_NIC0_1, - GAUDI2_ENGINE_ID_NIC1_0, - GAUDI2_ENGINE_ID_NIC1_1, - GAUDI2_ENGINE_ID_NIC2_0, - GAUDI2_ENGINE_ID_NIC2_1, - GAUDI2_ENGINE_ID_NIC3_0, - GAUDI2_ENGINE_ID_NIC3_1, - GAUDI2_ENGINE_ID_NIC4_0, - GAUDI2_ENGINE_ID_NIC4_1, - GAUDI2_ENGINE_ID_NIC5_0, - GAUDI2_ENGINE_ID_NIC5_1, - GAUDI2_ENGINE_ID_NIC6_0, - GAUDI2_ENGINE_ID_NIC6_1, - GAUDI2_ENGINE_ID_NIC7_0, - GAUDI2_ENGINE_ID_NIC7_1, - GAUDI2_ENGINE_ID_NIC8_0, - GAUDI2_ENGINE_ID_NIC8_1, - GAUDI2_ENGINE_ID_NIC9_0, - GAUDI2_ENGINE_ID_NIC9_1, - GAUDI2_ENGINE_ID_NIC10_0, - GAUDI2_ENGINE_ID_NIC10_1, - GAUDI2_ENGINE_ID_NIC11_0, - GAUDI2_ENGINE_ID_NIC11_1, - GAUDI2_ENGINE_ID_PCIE, - GAUDI2_ENGINE_ID_PSOC, - GAUDI2_ENGINE_ID_ARC_FARM, - GAUDI2_ENGINE_ID_KDMA, - GAUDI2_ENGINE_ID_SIZE -}; - -/* - * ASIC specific PLL index - * - * Used to retrieve in frequency info of different IPs via - * HL_INFO_PLL_FREQUENCY under HL_IOCTL_INFO IOCTL. The enums need to be - * used as an index in struct hl_pll_frequency_info - */ - -enum hl_goya_pll_index { - HL_GOYA_CPU_PLL = 0, - HL_GOYA_IC_PLL, - HL_GOYA_MC_PLL, - HL_GOYA_MME_PLL, - HL_GOYA_PCI_PLL, - HL_GOYA_EMMC_PLL, - HL_GOYA_TPC_PLL, - HL_GOYA_PLL_MAX -}; - -enum hl_gaudi_pll_index { - HL_GAUDI_CPU_PLL = 0, - HL_GAUDI_PCI_PLL, - HL_GAUDI_SRAM_PLL, - HL_GAUDI_HBM_PLL, - HL_GAUDI_NIC_PLL, - HL_GAUDI_DMA_PLL, - HL_GAUDI_MESH_PLL, - HL_GAUDI_MME_PLL, - HL_GAUDI_TPC_PLL, - HL_GAUDI_IF_PLL, - HL_GAUDI_PLL_MAX -}; - -enum hl_gaudi2_pll_index { - HL_GAUDI2_CPU_PLL = 0, - HL_GAUDI2_PCI_PLL, - HL_GAUDI2_SRAM_PLL, - HL_GAUDI2_HBM_PLL, - HL_GAUDI2_NIC_PLL, - HL_GAUDI2_DMA_PLL, - HL_GAUDI2_MESH_PLL, - HL_GAUDI2_MME_PLL, - HL_GAUDI2_TPC_PLL, - HL_GAUDI2_IF_PLL, - HL_GAUDI2_VID_PLL, - HL_GAUDI2_MSS_PLL, - HL_GAUDI2_PLL_MAX -}; - -/** - * enum hl_goya_dma_direction - Direction of DMA operation inside a LIN_DMA packet that is - * submitted to the GOYA's DMA QMAN. This attribute is not relevant - * to the H/W but the kernel driver use it to parse the packet's - * addresses and patch/validate them. - * @HL_DMA_HOST_TO_DRAM: DMA operation from Host memory to GOYA's DDR. - * @HL_DMA_HOST_TO_SRAM: DMA operation from Host memory to GOYA's SRAM. - * @HL_DMA_DRAM_TO_SRAM: DMA operation from GOYA's DDR to GOYA's SRAM. - * @HL_DMA_SRAM_TO_DRAM: DMA operation from GOYA's SRAM to GOYA's DDR. - * @HL_DMA_SRAM_TO_HOST: DMA operation from GOYA's SRAM to Host memory. - * @HL_DMA_DRAM_TO_HOST: DMA operation from GOYA's DDR to Host memory. - * @HL_DMA_DRAM_TO_DRAM: DMA operation from GOYA's DDR to GOYA's DDR. - * @HL_DMA_SRAM_TO_SRAM: DMA operation from GOYA's SRAM to GOYA's SRAM. - * @HL_DMA_ENUM_MAX: number of values in enum - */ -enum hl_goya_dma_direction { - HL_DMA_HOST_TO_DRAM, - HL_DMA_HOST_TO_SRAM, - HL_DMA_DRAM_TO_SRAM, - HL_DMA_SRAM_TO_DRAM, - HL_DMA_SRAM_TO_HOST, - HL_DMA_DRAM_TO_HOST, - HL_DMA_DRAM_TO_DRAM, - HL_DMA_SRAM_TO_SRAM, - HL_DMA_ENUM_MAX -}; - -/** - * enum hl_device_status - Device status information. - * @HL_DEVICE_STATUS_OPERATIONAL: Device is operational. - * @HL_DEVICE_STATUS_IN_RESET: Device is currently during reset. - * @HL_DEVICE_STATUS_MALFUNCTION: Device is unusable. - * @HL_DEVICE_STATUS_NEEDS_RESET: Device needs reset because auto reset was disabled. - * @HL_DEVICE_STATUS_IN_DEVICE_CREATION: Device is operational but its creation is still in - * progress. - * @HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE: Device is currently during reset that was - * triggered because the user released the device - * @HL_DEVICE_STATUS_LAST: Last status. - */ -enum hl_device_status { - HL_DEVICE_STATUS_OPERATIONAL, - HL_DEVICE_STATUS_IN_RESET, - HL_DEVICE_STATUS_MALFUNCTION, - HL_DEVICE_STATUS_NEEDS_RESET, - HL_DEVICE_STATUS_IN_DEVICE_CREATION, - HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE, - HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_RESET_AFTER_DEVICE_RELEASE -}; - -enum hl_server_type { - HL_SERVER_TYPE_UNKNOWN = 0, - HL_SERVER_GAUDI_HLS1 = 1, - HL_SERVER_GAUDI_HLS1H = 2, - HL_SERVER_GAUDI_TYPE1 = 3, - HL_SERVER_GAUDI_TYPE2 = 4, - HL_SERVER_GAUDI2_HLS2 = 5 -}; - -/* - * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command - * - * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event - * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code - * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset - * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error - * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable - * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state - * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error - * HL_NOTIFIER_EVENT_RAZWI - Indicates razwi happened - * HL_NOTIFIER_EVENT_PAGE_FAULT - Indicates page fault happened - */ -#define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) -#define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) -#define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) -#define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) -#define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) -#define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) -#define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) -#define HL_NOTIFIER_EVENT_RAZWI (1ULL << 7) -#define HL_NOTIFIER_EVENT_PAGE_FAULT (1ULL << 8) - -/* Opcode for management ioctl - * - * HW_IP_INFO - Receive information about different IP blocks in the - * device. - * HL_INFO_HW_EVENTS - Receive an array describing how many times each event - * occurred since the last hard reset. - * HL_INFO_DRAM_USAGE - Retrieve the dram usage inside the device and of the - * specific context. This is relevant only for devices - * where the dram is managed by the kernel driver - * HL_INFO_HW_IDLE - Retrieve information about the idle status of each - * internal engine. - * HL_INFO_DEVICE_STATUS - Retrieve the device's status. This opcode doesn't - * require an open context. - * HL_INFO_DEVICE_UTILIZATION - Retrieve the total utilization of the device - * over the last period specified by the user. - * The period can be between 100ms to 1s, in - * resolution of 100ms. The return value is a - * percentage of the utilization rate. - * HL_INFO_HW_EVENTS_AGGREGATE - Receive an array describing how many times each - * event occurred since the driver was loaded. - * HL_INFO_CLK_RATE - Retrieve the current and maximum clock rate - * of the device in MHz. The maximum clock rate is - * configurable via sysfs parameter - * HL_INFO_RESET_COUNT - Retrieve the counts of the soft and hard reset - * operations performed on the device since the last - * time the driver was loaded. - * HL_INFO_TIME_SYNC - Retrieve the device's time alongside the host's time - * for synchronization. - * HL_INFO_CS_COUNTERS - Retrieve command submission counters - * HL_INFO_PCI_COUNTERS - Retrieve PCI counters - * HL_INFO_CLK_THROTTLE_REASON - Retrieve clock throttling reason - * HL_INFO_SYNC_MANAGER - Retrieve sync manager info per dcore - * HL_INFO_TOTAL_ENERGY - Retrieve total energy consumption - * HL_INFO_PLL_FREQUENCY - Retrieve PLL frequency - * HL_INFO_POWER - Retrieve power information - * HL_INFO_OPEN_STATS - Retrieve info regarding recent device open calls - * HL_INFO_DRAM_REPLACED_ROWS - Retrieve DRAM replaced rows info - * HL_INFO_DRAM_PENDING_ROWS - Retrieve DRAM pending rows num - * HL_INFO_LAST_ERR_OPEN_DEV_TIME - Retrieve timestamp of the last time the device was opened - * and CS timeout or razwi error occurred. - * HL_INFO_CS_TIMEOUT_EVENT - Retrieve CS timeout timestamp and its related CS sequence number. - * HL_INFO_RAZWI_EVENT - Retrieve parameters of razwi: - * Timestamp of razwi. - * The address which accessing it caused the razwi. - * Razwi initiator. - * Razwi cause, was it a page fault or MMU access error. - * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation - * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot. - * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. - * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd - * HL_INFO_GET_EVENTS - Retrieve the last occurred events - * HL_INFO_UNDEFINED_OPCODE_EVENT - Retrieve last undefined opcode error information. - * HL_INFO_ENGINE_STATUS - Retrieve the status of all the h/w engines in the asic. - * HL_INFO_PAGE_FAULT_EVENT - Retrieve parameters of captured page fault. - * HL_INFO_USER_MAPPINGS - Retrieve user mappings, captured after page fault event. - * HL_INFO_FW_GENERIC_REQ - Send generic request to FW. - */ -#define HL_INFO_HW_IP_INFO 0 -#define HL_INFO_HW_EVENTS 1 -#define HL_INFO_DRAM_USAGE 2 -#define HL_INFO_HW_IDLE 3 -#define HL_INFO_DEVICE_STATUS 4 -#define HL_INFO_DEVICE_UTILIZATION 6 -#define HL_INFO_HW_EVENTS_AGGREGATE 7 -#define HL_INFO_CLK_RATE 8 -#define HL_INFO_RESET_COUNT 9 -#define HL_INFO_TIME_SYNC 10 -#define HL_INFO_CS_COUNTERS 11 -#define HL_INFO_PCI_COUNTERS 12 -#define HL_INFO_CLK_THROTTLE_REASON 13 -#define HL_INFO_SYNC_MANAGER 14 -#define HL_INFO_TOTAL_ENERGY 15 -#define HL_INFO_PLL_FREQUENCY 16 -#define HL_INFO_POWER 17 -#define HL_INFO_OPEN_STATS 18 -#define HL_INFO_DRAM_REPLACED_ROWS 21 -#define HL_INFO_DRAM_PENDING_ROWS 22 -#define HL_INFO_LAST_ERR_OPEN_DEV_TIME 23 -#define HL_INFO_CS_TIMEOUT_EVENT 24 -#define HL_INFO_RAZWI_EVENT 25 -#define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26 -#define HL_INFO_SECURED_ATTESTATION 27 -#define HL_INFO_REGISTER_EVENTFD 28 -#define HL_INFO_UNREGISTER_EVENTFD 29 -#define HL_INFO_GET_EVENTS 30 -#define HL_INFO_UNDEFINED_OPCODE_EVENT 31 -#define HL_INFO_ENGINE_STATUS 32 -#define HL_INFO_PAGE_FAULT_EVENT 33 -#define HL_INFO_USER_MAPPINGS 34 -#define HL_INFO_FW_GENERIC_REQ 35 - -#define HL_INFO_VERSION_MAX_LEN 128 -#define HL_INFO_CARD_NAME_MAX_LEN 16 - -/* Maximum buffer size for retrieving engines status */ -#define HL_ENGINES_DATA_MAX_SIZE SZ_1M - -/** - * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC - * @sram_base_address: The first SRAM physical base address that is free to be - * used by the user. - * @dram_base_address: The first DRAM virtual or physical base address that is - * free to be used by the user. - * @dram_size: The DRAM size that is available to the user. - * @sram_size: The SRAM size that is available to the user. - * @num_of_events: The number of events that can be received from the f/w. This - * is needed so the user can what is the size of the h/w events - * array he needs to pass to the kernel when he wants to fetch - * the event counters. - * @device_id: PCI device ID of the ASIC. - * @module_id: Module ID of the ASIC for mezzanine cards in servers - * (From OCP spec). - * @decoder_enabled_mask: Bit-mask that represents which decoders are enabled. - * @first_available_interrupt_id: The first available interrupt ID for the user - * to be used when it works with user interrupts. - * Relevant for Gaudi2 and later. - * @server_type: Server type that the Gaudi ASIC is currently installed in. - * The value is according to enum hl_server_type - * @cpld_version: CPLD version on the board. - * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs. - * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs. - * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs. - * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler - * in some ASICs. - * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant - * for Goya/Gaudi only. - * @dram_enabled: Whether the DRAM is enabled. - * @security_enabled: Whether security is enabled on device. - * @mme_master_slave_mode: Indicate whether the MME is working in master/slave - * configuration. Relevant for Greco and later. - * @cpucp_version: The CPUCP f/w version. - * @card_name: The card name as passed by the f/w. - * @tpc_enabled_mask_ext: Bit-mask that represents which TPCs are enabled. - * Relevant for Greco and later. - * @dram_page_size: The DRAM physical page size. - * @edma_enabled_mask: Bit-mask that represents which EDMAs are enabled. - * Relevant for Gaudi2 and later. - * @number_of_user_interrupts: The number of interrupts that are available to the userspace - * application to use. Relevant for Gaudi2 and later. - * @device_mem_alloc_default_page_size: default page size used in device memory allocation. - * @revision_id: PCI revision ID of the ASIC. - */ -struct hl_info_hw_ip_info { - __u64 sram_base_address; - __u64 dram_base_address; - __u64 dram_size; - __u32 sram_size; - __u32 num_of_events; - __u32 device_id; - __u32 module_id; - __u32 decoder_enabled_mask; - __u16 first_available_interrupt_id; - __u16 server_type; - __u32 cpld_version; - __u32 psoc_pci_pll_nr; - __u32 psoc_pci_pll_nf; - __u32 psoc_pci_pll_od; - __u32 psoc_pci_pll_div_factor; - __u8 tpc_enabled_mask; - __u8 dram_enabled; - __u8 security_enabled; - __u8 mme_master_slave_mode; - __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; - __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; - __u64 tpc_enabled_mask_ext; - __u64 dram_page_size; - __u32 edma_enabled_mask; - __u16 number_of_user_interrupts; - __u16 pad2; - __u64 reserved4; - __u64 device_mem_alloc_default_page_size; - __u64 reserved5; - __u64 reserved6; - __u32 reserved7; - __u8 reserved8; - __u8 revision_id; - __u8 pad[2]; -}; - -struct hl_info_dram_usage { - __u64 dram_free_mem; - __u64 ctx_dram_mem; -}; - -#define HL_BUSY_ENGINES_MASK_EXT_SIZE 4 - -struct hl_info_hw_idle { - __u32 is_idle; - /* - * Bitmask of busy engines. - * Bits definition is according to `enum _engine_id'. - */ - __u32 busy_engines_mask; - - /* - * Extended Bitmask of busy engines. - * Bits definition is according to `enum _engine_id'. - */ - __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE]; -}; - -struct hl_info_device_status { - __u32 status; - __u32 pad; -}; - -struct hl_info_device_utilization { - __u32 utilization; - __u32 pad; -}; - -struct hl_info_clk_rate { - __u32 cur_clk_rate_mhz; - __u32 max_clk_rate_mhz; -}; - -struct hl_info_reset_count { - __u32 hard_reset_cnt; - __u32 soft_reset_cnt; -}; - -struct hl_info_time_sync { - __u64 device_time; - __u64 host_time; -}; - -/** - * struct hl_info_pci_counters - pci counters - * @rx_throughput: PCI rx throughput KBps - * @tx_throughput: PCI tx throughput KBps - * @replay_cnt: PCI replay counter - */ -struct hl_info_pci_counters { - __u64 rx_throughput; - __u64 tx_throughput; - __u64 replay_cnt; -}; - -enum hl_clk_throttling_type { - HL_CLK_THROTTLE_TYPE_POWER, - HL_CLK_THROTTLE_TYPE_THERMAL, - HL_CLK_THROTTLE_TYPE_MAX -}; - -/* clk_throttling_reason masks */ -#define HL_CLK_THROTTLE_POWER (1 << HL_CLK_THROTTLE_TYPE_POWER) -#define HL_CLK_THROTTLE_THERMAL (1 << HL_CLK_THROTTLE_TYPE_THERMAL) - -/** - * struct hl_info_clk_throttle - clock throttling reason - * @clk_throttling_reason: each bit represents a clk throttling reason - * @clk_throttling_timestamp_us: represents CPU timestamp in microseconds of the start-event - * @clk_throttling_duration_ns: the clock throttle time in nanosec - */ -struct hl_info_clk_throttle { - __u32 clk_throttling_reason; - __u32 pad; - __u64 clk_throttling_timestamp_us[HL_CLK_THROTTLE_TYPE_MAX]; - __u64 clk_throttling_duration_ns[HL_CLK_THROTTLE_TYPE_MAX]; -}; - -/** - * struct hl_info_energy - device energy information - * @total_energy_consumption: total device energy consumption - */ -struct hl_info_energy { - __u64 total_energy_consumption; -}; - -#define HL_PLL_NUM_OUTPUTS 4 - -struct hl_pll_frequency_info { - __u16 output[HL_PLL_NUM_OUTPUTS]; -}; - -/** - * struct hl_open_stats_info - device open statistics information - * @open_counter: ever growing counter, increased on each successful dev open - * @last_open_period_ms: duration (ms) device was open last time - * @is_compute_ctx_active: Whether there is an active compute context executing - * @compute_ctx_in_release: true if the current compute context is being released - */ -struct hl_open_stats_info { - __u64 open_counter; - __u64 last_open_period_ms; - __u8 is_compute_ctx_active; - __u8 compute_ctx_in_release; - __u8 pad[6]; -}; - -/** - * struct hl_power_info - power information - * @power: power consumption - */ -struct hl_power_info { - __u64 power; -}; - -/** - * struct hl_info_sync_manager - sync manager information - * @first_available_sync_object: first available sob - * @first_available_monitor: first available monitor - * @first_available_cq: first available cq - */ -struct hl_info_sync_manager { - __u32 first_available_sync_object; - __u32 first_available_monitor; - __u32 first_available_cq; - __u32 reserved; -}; - -/** - * struct hl_info_cs_counters - command submission counters - * @total_out_of_mem_drop_cnt: total dropped due to memory allocation issue - * @ctx_out_of_mem_drop_cnt: context dropped due to memory allocation issue - * @total_parsing_drop_cnt: total dropped due to error in packet parsing - * @ctx_parsing_drop_cnt: context dropped due to error in packet parsing - * @total_queue_full_drop_cnt: total dropped due to queue full - * @ctx_queue_full_drop_cnt: context dropped due to queue full - * @total_device_in_reset_drop_cnt: total dropped due to device in reset - * @ctx_device_in_reset_drop_cnt: context dropped due to device in reset - * @total_max_cs_in_flight_drop_cnt: total dropped due to maximum CS in-flight - * @ctx_max_cs_in_flight_drop_cnt: context dropped due to maximum CS in-flight - * @total_validation_drop_cnt: total dropped due to validation error - * @ctx_validation_drop_cnt: context dropped due to validation error - */ -struct hl_info_cs_counters { - __u64 total_out_of_mem_drop_cnt; - __u64 ctx_out_of_mem_drop_cnt; - __u64 total_parsing_drop_cnt; - __u64 ctx_parsing_drop_cnt; - __u64 total_queue_full_drop_cnt; - __u64 ctx_queue_full_drop_cnt; - __u64 total_device_in_reset_drop_cnt; - __u64 ctx_device_in_reset_drop_cnt; - __u64 total_max_cs_in_flight_drop_cnt; - __u64 ctx_max_cs_in_flight_drop_cnt; - __u64 total_validation_drop_cnt; - __u64 ctx_validation_drop_cnt; -}; - -/** - * struct hl_info_last_err_open_dev_time - last error boot information. - * @timestamp: timestamp of last time the device was opened and error occurred. - */ -struct hl_info_last_err_open_dev_time { - __s64 timestamp; -}; - -/** - * struct hl_info_cs_timeout_event - last CS timeout information. - * @timestamp: timestamp when last CS timeout event occurred. - * @seq: sequence number of last CS timeout event. - */ -struct hl_info_cs_timeout_event { - __s64 timestamp; - __u64 seq; -}; - -#define HL_RAZWI_NA_ENG_ID U16_MAX -#define HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR 128 -#define HL_RAZWI_READ BIT(0) -#define HL_RAZWI_WRITE BIT(1) -#define HL_RAZWI_LBW BIT(2) -#define HL_RAZWI_HBW BIT(3) -#define HL_RAZWI_RR BIT(4) -#define HL_RAZWI_ADDR_DEC BIT(5) - -/** - * struct hl_info_razwi_event - razwi information. - * @timestamp: timestamp of razwi. - * @addr: address which accessing it caused razwi. - * @engine_id: engine id of the razwi initiator, if it was initiated by engine that does not - * have engine id it will be set to HL_RAZWI_NA_ENG_ID. If there are several possible - * engines which caused the razwi, it will hold all of them. - * @num_of_possible_engines: contains number of possible engine ids. In some asics, razwi indication - * might be common for several engines and there is no way to get the - * exact engine. In this way, engine_id array will be filled with all - * possible engines caused this razwi. Also, there might be possibility - * in gaudi, where we don't indication on specific engine, in that case - * the value of this parameter will be zero. - * @flags: bitmask for additional data: HL_RAZWI_READ - razwi caused by read operation - * HL_RAZWI_WRITE - razwi caused by write operation - * HL_RAZWI_LBW - razwi caused by lbw fabric transaction - * HL_RAZWI_HBW - razwi caused by hbw fabric transaction - * HL_RAZWI_RR - razwi caused by range register - * HL_RAZWI_ADDR_DEC - razwi caused by address decode error - * Note: this data is not supported by all asics, in that case the relevant bits will not - * be set. - */ -struct hl_info_razwi_event { - __s64 timestamp; - __u64 addr; - __u16 engine_id[HL_RAZWI_MAX_NUM_OF_ENGINES_PER_RTR]; - __u16 num_of_possible_engines; - __u8 flags; - __u8 pad[5]; -}; - -#define MAX_QMAN_STREAMS_INFO 4 -#define OPCODE_INFO_MAX_ADDR_SIZE 8 -/** - * struct hl_info_undefined_opcode_event - info about last undefined opcode error - * @timestamp: timestamp of the undefined opcode error - * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ - * entries. In case all streams array entries are - * filled with values, it means the execution was in Lower-CP. - * @cq_addr: the address of the current handled command buffer - * @cq_size: the size of the current handled command buffer - * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. - * should be equal to 1 in case of undefined opcode - * in Upper-CP (specific stream) and equal to 4 incase - * of undefined opcode in Lower-CP. - * @engine_id: engine-id that the error occurred on - * @stream_id: the stream id the error occurred on. In case the stream equals to - * MAX_QMAN_STREAMS_INFO it means the error occurred on a Lower-CP. - */ -struct hl_info_undefined_opcode_event { - __s64 timestamp; - __u64 cb_addr_streams[MAX_QMAN_STREAMS_INFO][OPCODE_INFO_MAX_ADDR_SIZE]; - __u64 cq_addr; - __u32 cq_size; - __u32 cb_addr_streams_len; - __u32 engine_id; - __u32 stream_id; -}; - -/** - * struct hl_info_dev_memalloc_page_sizes - valid page sizes in device mem alloc information. - * @page_order_bitmask: bitmap in which a set bit represents the order of the supported page size - * (e.g. 0x2100000 means that 1MB and 32MB pages are supported). - */ -struct hl_info_dev_memalloc_page_sizes { - __u64 page_order_bitmask; -}; - -#define SEC_PCR_DATA_BUF_SZ 256 -#define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ -#define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ -#define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ -#define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ - -/* - * struct hl_info_sec_attest - attestation report of the boot - * @nonce: number only used once. random number provided by host. this also passed to the quote - * command as a qualifying data. - * @pcr_quote_len: length of the attestation quote data (bytes) - * @pub_data_len: length of the public data (bytes) - * @certificate_len: length of the certificate (bytes) - * @pcr_num_reg: number of PCR registers in the pcr_data array - * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) - * @quote_sig_len: length of the attestation report signature (bytes) - * @pcr_data: raw values of the PCR registers - * @pcr_quote: attestation report data structure - * @quote_sig: signature structure of the attestation report - * @public_data: public key for the signed attestation - * (outPublic + name + qualifiedName) - * @certificate: certificate for the attestation signing key - */ -struct hl_info_sec_attest { - __u32 nonce; - __u16 pcr_quote_len; - __u16 pub_data_len; - __u16 certificate_len; - __u8 pcr_num_reg; - __u8 pcr_reg_len; - __u8 quote_sig_len; - __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; - __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; - __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; - __u8 public_data[SEC_PUB_DATA_BUF_SZ]; - __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; - __u8 pad0[2]; -}; - -/** - * struct hl_page_fault_info - page fault information. - * @timestamp: timestamp of page fault. - * @addr: address which accessing it caused page fault. - * @engine_id: engine id which caused the page fault, supported only in gaudi3. - */ -struct hl_page_fault_info { - __s64 timestamp; - __u64 addr; - __u16 engine_id; - __u8 pad[6]; -}; - -/** - * struct hl_user_mapping - user mapping information. - * @dev_va: device virtual address. - * @size: virtual address mapping size. - */ -struct hl_user_mapping { - __u64 dev_va; - __u64 size; -}; - -enum gaudi_dcores { - HL_GAUDI_WS_DCORE, - HL_GAUDI_WN_DCORE, - HL_GAUDI_EN_DCORE, - HL_GAUDI_ES_DCORE -}; - -/** - * struct hl_info_args - Main structure to retrieve device related information. - * @return_pointer: User space address of the relevant structure related to HL_INFO_* operation - * mentioned in @op. - * @return_size: Size of the structure used in @return_pointer, just like "size" in "snprintf", it - * limits how many bytes the kernel can write. For hw_events array, the size should be - * hl_info_hw_ip_info.num_of_events * sizeof(__u32). - * @op: Defines which type of information to be retrieved. Refer HL_INFO_* for details. - * @dcore_id: DCORE id for which the information is relevant (for Gaudi refer to enum gaudi_dcores). - * @ctx_id: Context ID of the user. Currently not in use. - * @period_ms: Period value, in milliseconds, for utilization rate in range 100ms - 1000ms in 100 ms - * resolution. Currently not in use. - * @pll_index: Index as defined in hl__pll_index enumeration. - * @eventfd: event file descriptor for event notifications. - * @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the - * driver. It is possible for the user to allocate buffer larger than - * needed, hence updating this variable so user will know the exact amount - * of bytes copied by the kernel to the buffer. - * @sec_attest_nonce: Nonce number used for attestation report. - * @array_size: Number of array members copied to user buffer. - * Relevant for HL_INFO_USER_MAPPINGS info ioctl. - * @fw_sub_opcode: generic requests sub opcodes. - * @pad: Padding to 64 bit. - */ -struct hl_info_args { - __u64 return_pointer; - __u32 return_size; - __u32 op; - - union { - __u32 dcore_id; - __u32 ctx_id; - __u32 period_ms; - __u32 pll_index; - __u32 eventfd; - __u32 user_buffer_actual_size; - __u32 sec_attest_nonce; - __u32 array_size; - __u32 fw_sub_opcode; - }; - - __u32 pad; -}; - -/* Opcode to create a new command buffer */ -#define HL_CB_OP_CREATE 0 -/* Opcode to destroy previously created command buffer */ -#define HL_CB_OP_DESTROY 1 -/* Opcode to retrieve information about a command buffer */ -#define HL_CB_OP_INFO 2 - -/* 2MB minus 32 bytes for 2xMSG_PROT */ -#define HL_MAX_CB_SIZE (0x200000 - 32) - -/* Indicates whether the command buffer should be mapped to the device's MMU */ -#define HL_CB_FLAGS_MAP 0x1 - -/* Used with HL_CB_OP_INFO opcode to get the device va address for kernel mapped CB */ -#define HL_CB_FLAGS_GET_DEVICE_VA 0x2 - -struct hl_cb_in { - /* Handle of CB or 0 if we want to create one */ - __u64 cb_handle; - /* HL_CB_OP_* */ - __u32 op; - - /* Size of CB. Maximum size is HL_MAX_CB_SIZE. The minimum size that - * will be allocated, regardless of this parameter's value, is PAGE_SIZE - */ - __u32 cb_size; - - /* Context ID - Currently not in use */ - __u32 ctx_id; - /* HL_CB_FLAGS_* */ - __u32 flags; -}; - -struct hl_cb_out { - union { - /* Handle of CB */ - __u64 cb_handle; - - union { - /* Information about CB */ - struct { - /* Usage count of CB */ - __u32 usage_cnt; - __u32 pad; - }; - - /* CB mapped address to device MMU */ - __u64 device_va; - }; - }; -}; - -union hl_cb_args { - struct hl_cb_in in; - struct hl_cb_out out; -}; - -/* HL_CS_CHUNK_FLAGS_ values - * - * HL_CS_CHUNK_FLAGS_USER_ALLOC_CB: - * Indicates if the CB was allocated and mapped by userspace - * (relevant to greco and above). User allocated CB is a command buffer, - * allocated by the user, via malloc (or similar). After allocating the - * CB, the user invokes - “memory ioctl” to map the user memory into a - * device virtual address. The user provides this address via the - * cb_handle field. The interface provides the ability to create a - * large CBs, Which aren’t limited to “HL_MAX_CB_SIZE”. Therefore, it - * increases the PCI-DMA queues throughput. This CB allocation method - * also reduces the use of Linux DMA-able memory pool. Which are limited - * and used by other Linux sub-systems. - */ -#define HL_CS_CHUNK_FLAGS_USER_ALLOC_CB 0x1 - -/* - * This structure size must always be fixed to 64-bytes for backward - * compatibility - */ -struct hl_cs_chunk { - union { - /* Goya/Gaudi: - * For external queue, this represents a Handle of CB on the - * Host. - * For internal queue in Goya, this represents an SRAM or - * a DRAM address of the internal CB. In Gaudi, this might also - * represent a mapped host address of the CB. - * - * Greco onwards: - * For H/W queue, this represents either a Handle of CB on the - * Host, or an SRAM, a DRAM, or a mapped host address of the CB. - * - * A mapped host address is in the device address space, after - * a host address was mapped by the device MMU. - */ - __u64 cb_handle; - - /* Relevant only when HL_CS_FLAGS_WAIT or - * HL_CS_FLAGS_COLLECTIVE_WAIT is set - * This holds address of array of u64 values that contain - * signal CS sequence numbers. The wait described by - * this job will listen on all those signals - * (wait event per signal) - */ - __u64 signal_seq_arr; - - /* - * Relevant only when HL_CS_FLAGS_WAIT or - * HL_CS_FLAGS_COLLECTIVE_WAIT is set - * along with HL_CS_FLAGS_ENCAP_SIGNALS. - * This is the CS sequence which has the encapsulated signals. - */ - __u64 encaps_signal_seq; - }; - - /* Index of queue to put the CB on */ - __u32 queue_index; - - union { - /* - * Size of command buffer with valid packets - * Can be smaller then actual CB size - */ - __u32 cb_size; - - /* Relevant only when HL_CS_FLAGS_WAIT or - * HL_CS_FLAGS_COLLECTIVE_WAIT is set. - * Number of entries in signal_seq_arr - */ - __u32 num_signal_seq_arr; - - /* Relevant only when HL_CS_FLAGS_WAIT or - * HL_CS_FLAGS_COLLECTIVE_WAIT is set along - * with HL_CS_FLAGS_ENCAP_SIGNALS - * This set the signals range that the user want to wait for - * out of the whole reserved signals range. - * e.g if the signals range is 20, and user don't want - * to wait for signal 8, so he set this offset to 7, then - * he call the API again with 9 and so on till 20. - */ - __u32 encaps_signal_offset; - }; - - /* HL_CS_CHUNK_FLAGS_* */ - __u32 cs_chunk_flags; - - /* Relevant only when HL_CS_FLAGS_COLLECTIVE_WAIT is set. - * This holds the collective engine ID. The wait described by this job - * will sync with this engine and with all NICs before completion. - */ - __u32 collective_engine_id; - - /* Align structure to 64 bytes */ - __u32 pad[10]; -}; - -/* SIGNAL/WAIT/COLLECTIVE_WAIT flags are mutually exclusive */ -#define HL_CS_FLAGS_FORCE_RESTORE 0x1 -#define HL_CS_FLAGS_SIGNAL 0x2 -#define HL_CS_FLAGS_WAIT 0x4 -#define HL_CS_FLAGS_COLLECTIVE_WAIT 0x8 - -#define HL_CS_FLAGS_TIMESTAMP 0x20 -#define HL_CS_FLAGS_STAGED_SUBMISSION 0x40 -#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST 0x80 -#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST 0x100 -#define HL_CS_FLAGS_CUSTOM_TIMEOUT 0x200 -#define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT 0x400 - -/* - * The encapsulated signals CS is merged into the existing CS ioctls. - * In order to use this feature need to follow the below procedure: - * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY - * the output of this API will be the SOB offset from CFG_BASE. - * this address will be used to patch CB cmds to do the signaling for this - * SOB by incrementing it's value. - * for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY - * CS type, note that this might fail if out-of-sync happened to the SOB - * value, in case other signaling request to the same SOB occurred between - * reserve-unreserve calls. - * 2. Use the staged CS to do the encapsulated signaling jobs. - * use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST - * along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset - * field. This offset allows app to wait on part of the reserved signals. - * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag - * to wait for the encapsulated signals. - */ -#define HL_CS_FLAGS_ENCAP_SIGNALS 0x800 -#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 -#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 - -/* - * The engine cores CS is merged into the existing CS ioctls. - * Use it to control the engine cores mode. - */ -#define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 - -#define HL_CS_STATUS_SUCCESS 0 - -#define HL_MAX_JOBS_PER_CS 512 - -/* HL_ENGINE_CORE_ values - * - * HL_ENGINE_CORE_HALT: engine core halt - * HL_ENGINE_CORE_RUN: engine core run - */ -#define HL_ENGINE_CORE_HALT (1 << 0) -#define HL_ENGINE_CORE_RUN (1 << 1) - -struct hl_cs_in { - - union { - struct { - /* this holds address of array of hl_cs_chunk for restore phase */ - __u64 chunks_restore; - - /* holds address of array of hl_cs_chunk for execution phase */ - __u64 chunks_execute; - }; - - /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ - struct { - /* this holds address of array of uint32 for engine_cores */ - __u64 engine_cores; - - /* number of engine cores in engine_cores array */ - __u32 num_engine_cores; - - /* the core command to be sent towards engine cores */ - __u32 core_command; - }; - }; - - union { - /* - * Sequence number of a staged submission CS - * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and - * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset. - */ - __u64 seq; - - /* - * Encapsulated signals handle id - * Valid for two flows: - * 1. CS with encapsulated signals: - * when HL_CS_FLAGS_STAGED_SUBMISSION and - * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST - * and HL_CS_FLAGS_ENCAP_SIGNALS are set. - * 2. unreserve signals: - * valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set. - */ - __u32 encaps_sig_handle_id; - - /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ - struct { - /* Encapsulated signals number */ - __u32 encaps_signals_count; - - /* Encapsulated signals queue index (stream) */ - __u32 encaps_signals_q_idx; - }; - }; - - /* Number of chunks in restore phase array. Maximum number is - * HL_MAX_JOBS_PER_CS - */ - __u32 num_chunks_restore; - - /* Number of chunks in execution array. Maximum number is - * HL_MAX_JOBS_PER_CS - */ - __u32 num_chunks_execute; - - /* timeout in seconds - valid only if HL_CS_FLAGS_CUSTOM_TIMEOUT - * is set - */ - __u32 timeout; - - /* HL_CS_FLAGS_* */ - __u32 cs_flags; - - /* Context ID - Currently not in use */ - __u32 ctx_id; - __u8 pad[4]; -}; - -struct hl_cs_out { - union { - /* - * seq holds the sequence number of the CS to pass to wait - * ioctl. All values are valid except for 0 and ULLONG_MAX - */ - __u64 seq; - - /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ - struct { - /* This is the reserved signal handle id */ - __u32 handle_id; - - /* This is the signals count */ - __u32 count; - }; - }; - - /* HL_CS_STATUS */ - __u32 status; - - /* - * SOB base address offset - * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY or HL_CS_FLAGS_SIGNAL is set - */ - __u32 sob_base_addr_offset; - - /* - * Count of completed signals in SOB before current signal submission. - * Valid only when (HL_CS_FLAGS_ENCAP_SIGNALS & HL_CS_FLAGS_STAGED_SUBMISSION) - * or HL_CS_FLAGS_SIGNAL is set - */ - __u16 sob_count_before_submission; - __u16 pad[3]; -}; - -union hl_cs_args { - struct hl_cs_in in; - struct hl_cs_out out; -}; - -#define HL_WAIT_CS_FLAGS_INTERRUPT 0x2 -#define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000 -#define HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT 0xFFF00000 -#define HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT 0xFFE00000 -#define HL_WAIT_CS_FLAGS_MULTI_CS 0x4 -#define HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ 0x10 -#define HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT 0x20 - -#define HL_WAIT_MULTI_CS_LIST_MAX_LEN 32 - -struct hl_wait_cs_in { - union { - struct { - /* - * In case of wait_cs holds the CS sequence number. - * In case of wait for multi CS hold a user pointer to - * an array of CS sequence numbers - */ - __u64 seq; - /* Absolute timeout to wait for command submission - * in microseconds - */ - __u64 timeout_us; - }; - - struct { - union { - /* User address for completion comparison. - * upon interrupt, driver will compare the value pointed - * by this address with the supplied target value. - * in order not to perform any comparison, set address - * to all 1s. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u64 addr; - - /* cq_counters_handle to a kernel mapped cb which contains - * cq counters. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set - */ - __u64 cq_counters_handle; - }; - - /* Target value for completion comparison */ - __u64 target; - }; - }; - - /* Context ID - Currently not in use */ - __u32 ctx_id; - - /* HL_WAIT_CS_FLAGS_* - * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include - * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK - * - * in order to wait for any CQ interrupt, set interrupt value to - * HL_WAIT_CS_FLAGS_ANY_CQ_INTERRUPT. - * - * in order to wait for any decoder interrupt, set interrupt value to - * HL_WAIT_CS_FLAGS_ANY_DEC_INTERRUPT. - */ - __u32 flags; - - union { - struct { - /* Multi CS API info- valid entries in multi-CS array */ - __u8 seq_arr_len; - __u8 pad[7]; - }; - - /* Absolute timeout to wait for an interrupt in microseconds. - * Relevant only when HL_WAIT_CS_FLAGS_INTERRUPT is set - */ - __u64 interrupt_timeout_us; - }; - - /* - * cq counter offset inside the counters cb pointed by cq_counters_handle above. - * upon interrupt, driver will compare the value pointed - * by this address (cq_counters_handle + cq_counters_offset) - * with the supplied target value. - * relevant only when HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ is set - */ - __u64 cq_counters_offset; - - /* - * Timestamp_handle timestamps buffer handle. - * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set - */ - __u64 timestamp_handle; - - /* - * Timestamp_offset is offset inside the timestamp buffer pointed by timestamp_handle above. - * upon interrupt, if the cq reached the target value then driver will write - * timestamp to this offset. - * relevant only when HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT is set - */ - __u64 timestamp_offset; -}; - -#define HL_WAIT_CS_STATUS_COMPLETED 0 -#define HL_WAIT_CS_STATUS_BUSY 1 -#define HL_WAIT_CS_STATUS_TIMEDOUT 2 -#define HL_WAIT_CS_STATUS_ABORTED 3 - -#define HL_WAIT_CS_STATUS_FLAG_GONE 0x1 -#define HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD 0x2 - -struct hl_wait_cs_out { - /* HL_WAIT_CS_STATUS_* */ - __u32 status; - /* HL_WAIT_CS_STATUS_FLAG* */ - __u32 flags; - /* - * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set - * for wait_cs: timestamp of CS completion - * for wait_multi_cs: timestamp of FIRST CS completion - */ - __s64 timestamp_nsec; - /* multi CS completion bitmap */ - __u32 cs_completion_map; - __u32 pad; -}; - -union hl_wait_cs_args { - struct hl_wait_cs_in in; - struct hl_wait_cs_out out; -}; - -/* Opcode to allocate device memory */ -#define HL_MEM_OP_ALLOC 0 - -/* Opcode to free previously allocated device memory */ -#define HL_MEM_OP_FREE 1 - -/* Opcode to map host and device memory */ -#define HL_MEM_OP_MAP 2 - -/* Opcode to unmap previously mapped host and device memory */ -#define HL_MEM_OP_UNMAP 3 - -/* Opcode to map a hw block */ -#define HL_MEM_OP_MAP_BLOCK 4 - -/* Opcode to create DMA-BUF object for an existing device memory allocation - * and to export an FD of that DMA-BUF back to the caller - */ -#define HL_MEM_OP_EXPORT_DMABUF_FD 5 - -/* Opcode to create timestamps pool for user interrupts registration support - * The memory will be allocated by the kernel driver, A timestamp buffer which the user - * will get handle to it for mmap, and another internal buffer used by the - * driver for registration management - * The memory will be freed when the user closes the file descriptor(ctx close) - */ -#define HL_MEM_OP_TS_ALLOC 6 - -/* Memory flags */ -#define HL_MEM_CONTIGUOUS 0x1 -#define HL_MEM_SHARED 0x2 -#define HL_MEM_USERPTR 0x4 -#define HL_MEM_FORCE_HINT 0x8 -#define HL_MEM_PREFETCH 0x40 - -/** - * structure hl_mem_in - structure that handle input args for memory IOCTL - * @union arg: union of structures to be used based on the input operation - * @op: specify the requested memory operation (one of the HL_MEM_OP_* definitions). - * @flags: flags for the memory operation (one of the HL_MEM_* definitions). - * For the HL_MEM_OP_EXPORT_DMABUF_FD opcode, this field holds the DMA-BUF file/FD flags. - * @ctx_id: context ID - currently not in use. - * @num_of_elements: number of timestamp elements used only with HL_MEM_OP_TS_ALLOC opcode. - */ -struct hl_mem_in { - union { - /** - * structure for device memory allocation (used with the HL_MEM_OP_ALLOC op) - * @mem_size: memory size to allocate - * @page_size: page size to use on allocation. when the value is 0 the default page - * size will be taken. - */ - struct { - __u64 mem_size; - __u64 page_size; - } alloc; - - /** - * structure for free-ing device memory (used with the HL_MEM_OP_FREE op) - * @handle: handle returned from HL_MEM_OP_ALLOC - */ - struct { - __u64 handle; - } free; - - /** - * structure for mapping device memory (used with the HL_MEM_OP_MAP op) - * @hint_addr: requested virtual address of mapped memory. - * the driver will try to map the requested region to this hint - * address, as long as the address is valid and not already mapped. - * the user should check the returned address of the IOCTL to make - * sure he got the hint address. - * passing 0 here means that the driver will choose the address itself. - * @handle: handle returned from HL_MEM_OP_ALLOC. - */ - struct { - __u64 hint_addr; - __u64 handle; - } map_device; - - /** - * structure for mapping host memory (used with the HL_MEM_OP_MAP op) - * @host_virt_addr: address of allocated host memory. - * @hint_addr: requested virtual address of mapped memory. - * the driver will try to map the requested region to this hint - * address, as long as the address is valid and not already mapped. - * the user should check the returned address of the IOCTL to make - * sure he got the hint address. - * passing 0 here means that the driver will choose the address itself. - * @size: size of allocated host memory. - */ - struct { - __u64 host_virt_addr; - __u64 hint_addr; - __u64 mem_size; - } map_host; - - /** - * structure for mapping hw block (used with the HL_MEM_OP_MAP_BLOCK op) - * @block_addr:HW block address to map, a handle and size will be returned - * to the user and will be used to mmap the relevant block. - * only addresses from configuration space are allowed. - */ - struct { - __u64 block_addr; - } map_block; - - /** - * structure for unmapping host memory (used with the HL_MEM_OP_UNMAP op) - * @device_virt_addr: virtual address returned from HL_MEM_OP_MAP - */ - struct { - __u64 device_virt_addr; - } unmap; - - /** - * structure for exporting DMABUF object (used with - * the HL_MEM_OP_EXPORT_DMABUF_FD op) - * @addr: for Gaudi1, the driver expects a physical address - * inside the device's DRAM. this is because in Gaudi1 - * we don't have MMU that covers the device's DRAM. - * for all other ASICs, the driver expects a device - * virtual address that represents the start address of - * a mapped DRAM memory area inside the device. - * the address must be the same as was received from the - * driver during a previous HL_MEM_OP_MAP operation. - * @mem_size: size of memory to export. - * @offset: for Gaudi1, this value must be 0. For all other ASICs, - * the driver expects an offset inside of the memory area - * describe by addr. the offset represents the start - * address of that the exported dma-buf object describes. - */ - struct { - __u64 addr; - __u64 mem_size; - __u64 offset; - } export_dmabuf_fd; - }; - - __u32 op; - __u32 flags; - __u32 ctx_id; - __u32 num_of_elements; -}; - -struct hl_mem_out { - union { - /* - * Used for HL_MEM_OP_MAP as the virtual address that was - * assigned in the device VA space. - * A value of 0 means the requested operation failed. - */ - __u64 device_virt_addr; - - /* - * Used in HL_MEM_OP_ALLOC - * This is the assigned handle for the allocated memory - */ - __u64 handle; - - struct { - /* - * Used in HL_MEM_OP_MAP_BLOCK. - * This is the assigned handle for the mapped block - */ - __u64 block_handle; - - /* - * Used in HL_MEM_OP_MAP_BLOCK - * This is the size of the mapped block - */ - __u32 block_size; - - __u32 pad; - }; - - /* Returned in HL_MEM_OP_EXPORT_DMABUF_FD. Represents the - * DMA-BUF object that was created to describe a memory - * allocation on the device's memory space. The FD should be - * passed to the importer driver - */ - __s32 fd; - }; -}; - -union hl_mem_args { - struct hl_mem_in in; - struct hl_mem_out out; -}; - -#define HL_DEBUG_MAX_AUX_VALUES 10 - -struct hl_debug_params_etr { - /* Address in memory to allocate buffer */ - __u64 buffer_address; - - /* Size of buffer to allocate */ - __u64 buffer_size; - - /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ - __u32 sink_mode; - __u32 pad; -}; - -struct hl_debug_params_etf { - /* Address in memory to allocate buffer */ - __u64 buffer_address; - - /* Size of buffer to allocate */ - __u64 buffer_size; - - /* Sink operation mode: SW fifo, HW fifo, Circular buffer */ - __u32 sink_mode; - __u32 pad; -}; - -struct hl_debug_params_stm { - /* Two bit masks for HW event and Stimulus Port */ - __u64 he_mask; - __u64 sp_mask; - - /* Trace source ID */ - __u32 id; - - /* Frequency for the timestamp register */ - __u32 frequency; -}; - -struct hl_debug_params_bmon { - /* Two address ranges that the user can request to filter */ - __u64 start_addr0; - __u64 addr_mask0; - - __u64 start_addr1; - __u64 addr_mask1; - - /* Capture window configuration */ - __u32 bw_win; - __u32 win_capture; - - /* Trace source ID */ - __u32 id; - - /* Control register */ - __u32 control; - - /* Two more address ranges that the user can request to filter */ - __u64 start_addr2; - __u64 end_addr2; - - __u64 start_addr3; - __u64 end_addr3; -}; - -struct hl_debug_params_spmu { - /* Event types selection */ - __u64 event_types[HL_DEBUG_MAX_AUX_VALUES]; - - /* Number of event types selection */ - __u32 event_types_num; - - /* TRC configuration register values */ - __u32 pmtrc_val; - __u32 trc_ctrl_host_val; - __u32 trc_en_host_val; -}; - -/* Opcode for ETR component */ -#define HL_DEBUG_OP_ETR 0 -/* Opcode for ETF component */ -#define HL_DEBUG_OP_ETF 1 -/* Opcode for STM component */ -#define HL_DEBUG_OP_STM 2 -/* Opcode for FUNNEL component */ -#define HL_DEBUG_OP_FUNNEL 3 -/* Opcode for BMON component */ -#define HL_DEBUG_OP_BMON 4 -/* Opcode for SPMU component */ -#define HL_DEBUG_OP_SPMU 5 -/* Opcode for timestamp (deprecated) */ -#define HL_DEBUG_OP_TIMESTAMP 6 -/* Opcode for setting the device into or out of debug mode. The enable - * variable should be 1 for enabling debug mode and 0 for disabling it - */ -#define HL_DEBUG_OP_SET_MODE 7 - -struct hl_debug_args { - /* - * Pointer to user input structure. - * This field is relevant to specific opcodes. - */ - __u64 input_ptr; - /* Pointer to user output structure */ - __u64 output_ptr; - /* Size of user input structure */ - __u32 input_size; - /* Size of user output structure */ - __u32 output_size; - /* HL_DEBUG_OP_* */ - __u32 op; - /* - * Register index in the component, taken from the debug_regs_index enum - * in the various ASIC header files - */ - __u32 reg_idx; - /* Enable/disable */ - __u32 enable; - /* Context ID - Currently not in use */ - __u32 ctx_id; -}; - -/* - * Various information operations such as: - * - H/W IP information - * - Current dram usage - * - * The user calls this IOCTL with an opcode that describes the required - * information. The user should supply a pointer to a user-allocated memory - * chunk, which will be filled by the driver with the requested information. - * - * The user supplies the maximum amount of size to copy into the user's memory, - * in order to prevent data corruption in case of differences between the - * definitions of structures in kernel and userspace, e.g. in case of old - * userspace and new kernel driver - */ -#define HL_IOCTL_INFO \ - _IOWR('H', 0x01, struct hl_info_args) - -/* - * Command Buffer - * - Request a Command Buffer - * - Destroy a Command Buffer - * - * The command buffers are memory blocks that reside in DMA-able address - * space and are physically contiguous so they can be accessed by the device - * directly. They are allocated using the coherent DMA API. - * - * When creating a new CB, the IOCTL returns a handle of it, and the user-space - * process needs to use that handle to mmap the buffer so it can access them. - * - * In some instances, the device must access the command buffer through the - * device's MMU, and thus its memory should be mapped. In these cases, user can - * indicate the driver that such a mapping is required. - * The resulting device virtual address will be used internally by the driver, - * and won't be returned to user. - * - */ -#define HL_IOCTL_CB \ - _IOWR('H', 0x02, union hl_cb_args) - -/* - * Command Submission - * - * To submit work to the device, the user need to call this IOCTL with a set - * of JOBS. That set of JOBS constitutes a CS object. - * Each JOB will be enqueued on a specific queue, according to the user's input. - * There can be more then one JOB per queue. - * - * The CS IOCTL will receive two sets of JOBS. One set is for "restore" phase - * and a second set is for "execution" phase. - * The JOBS on the "restore" phase are enqueued only after context-switch - * (or if its the first CS for this context). The user can also order the - * driver to run the "restore" phase explicitly - * - * Goya/Gaudi: - * There are two types of queues - external and internal. External queues - * are DMA queues which transfer data from/to the Host. All other queues are - * internal. The driver will get completion notifications from the device only - * on JOBS which are enqueued in the external queues. - * - * Greco onwards: - * There is a single type of queue for all types of engines, either DMA engines - * for transfers from/to the host or inside the device, or compute engines. - * The driver will get completion notifications from the device for all queues. - * - * For jobs on external queues, the user needs to create command buffers - * through the CB ioctl and give the CB's handle to the CS ioctl. For jobs on - * internal queues, the user needs to prepare a "command buffer" with packets - * on either the device SRAM/DRAM or the host, and give the device address of - * that buffer to the CS ioctl. - * For jobs on H/W queues both options of command buffers are valid. - * - * This IOCTL is asynchronous in regard to the actual execution of the CS. This - * means it returns immediately after ALL the JOBS were enqueued on their - * relevant queues. Therefore, the user mustn't assume the CS has been completed - * or has even started to execute. - * - * Upon successful enqueue, the IOCTL returns a sequence number which the user - * can use with the "Wait for CS" IOCTL to check whether the handle's CS - * non-internal JOBS have been completed. Note that if the CS has internal JOBS - * which can execute AFTER the external JOBS have finished, the driver might - * report that the CS has finished executing BEFORE the internal JOBS have - * actually finished executing. - * - * Even though the sequence number increments per CS, the user can NOT - * automatically assume that if CS with sequence number N finished, then CS - * with sequence number N-1 also finished. The user can make this assumption if - * and only if CS N and CS N-1 are exactly the same (same CBs for the same - * queues). - */ -#define HL_IOCTL_CS \ - _IOWR('H', 0x03, union hl_cs_args) - -/* - * Wait for Command Submission - * - * The user can call this IOCTL with a handle it received from the CS IOCTL - * to wait until the handle's CS has finished executing. The user will wait - * inside the kernel until the CS has finished or until the user-requested - * timeout has expired. - * - * If the timeout value is 0, the driver won't sleep at all. It will check - * the status of the CS and return immediately - * - * The return value of the IOCTL is a standard Linux error code. The possible - * values are: - * - * EINTR - Kernel waiting has been interrupted, e.g. due to OS signal - * that the user process received - * ETIMEDOUT - The CS has caused a timeout on the device - * EIO - The CS was aborted (usually because the device was reset) - * ENODEV - The device wants to do hard-reset (so user need to close FD) - * - * The driver also returns a custom define in case the IOCTL call returned 0. - * The define can be one of the following: - * - * HL_WAIT_CS_STATUS_COMPLETED - The CS has been completed successfully (0) - * HL_WAIT_CS_STATUS_BUSY - The CS is still executing (0) - * HL_WAIT_CS_STATUS_TIMEDOUT - The CS has caused a timeout on the device - * (ETIMEDOUT) - * HL_WAIT_CS_STATUS_ABORTED - The CS was aborted, usually because the - * device was reset (EIO) - */ - -#define HL_IOCTL_WAIT_CS \ - _IOWR('H', 0x04, union hl_wait_cs_args) - -/* - * Memory - * - Map host memory to device MMU - * - Unmap host memory from device MMU - * - * This IOCTL allows the user to map host memory to the device MMU - * - * For host memory, the IOCTL doesn't allocate memory. The user is supposed - * to allocate the memory in user-space (malloc/new). The driver pins the - * physical pages (up to the allowed limit by the OS), assigns a virtual - * address in the device VA space and initializes the device MMU. - * - * There is an option for the user to specify the requested virtual address. - * - */ -#define HL_IOCTL_MEMORY \ - _IOWR('H', 0x05, union hl_mem_args) - -/* - * Debug - * - Enable/disable the ETR/ETF/FUNNEL/STM/BMON/SPMU debug traces - * - * This IOCTL allows the user to get debug traces from the chip. - * - * Before the user can send configuration requests of the various - * debug/profile engines, it needs to set the device into debug mode. - * This is because the debug/profile infrastructure is shared component in the - * device and we can't allow multiple users to access it at the same time. - * - * Once a user set the device into debug mode, the driver won't allow other - * users to "work" with the device, i.e. open a FD. If there are multiple users - * opened on the device, the driver won't allow any user to debug the device. - * - * For each configuration request, the user needs to provide the register index - * and essential data such as buffer address and size. - * - * Once the user has finished using the debug/profile engines, he should - * set the device into non-debug mode, i.e. disable debug mode. - * - * The driver can decide to "kick out" the user if he abuses this interface. - * - */ -#define HL_IOCTL_DEBUG \ - _IOWR('H', 0x06, struct hl_debug_args) - -#define HL_COMMAND_START 0x01 -#define HL_COMMAND_END 0x07 - -#endif /* HABANALABS_H_ */ -- cgit v1.3.1 From 20faaeec3794661c1d73578316691174a5a0e5a9 Mon Sep 17 00:00:00 2001 From: Ohad Sharabi Date: Sun, 18 Dec 2022 09:42:34 +0200 Subject: habanalabs: add uapi to flush inbound HBM transactions When doing p2p with a NIC device, the NIC needs to make sure all the writes to the HBM (through the PCI bar of the Gaudi device) were flushed. It can be done by either the NIC or the host reading through the PCI bar. To support the host side, we supply a simple uapi to perform this flush through the driver, because the user can't create such a transaction by itself (the PCI bar isn't exposed to normal users). Signed-off-by: Ohad Sharabi Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- .../accel/habanalabs/common/command_submission.c | 23 +++++++++++++++++++++- drivers/accel/habanalabs/common/habanalabs.h | 6 +++++- drivers/accel/habanalabs/gaudi/gaudi.c | 2 ++ drivers/accel/habanalabs/gaudi2/gaudi2.c | 2 ++ .../habanalabs/include/gaudi/asic_reg/gaudi_regs.h | 2 ++ include/uapi/drm/habanalabs_accel.h | 8 ++++++++ 6 files changed, 41 insertions(+), 2 deletions(-) (limited to 'include/uapi') diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index f6ee10334235..bb9584d72c32 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -13,7 +13,8 @@ #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ - HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND) + HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ + HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) #define MAX_TS_ITER_NUM 10 @@ -1295,6 +1296,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) return CS_UNRESERVE_SIGNALS; else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) return CS_TYPE_ENGINE_CORE; + else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) + return CS_TYPE_FLUSH_PCI_HBW_WRITES; else return CS_TYPE_DEFAULT; } @@ -2443,6 +2446,21 @@ static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, return rc; } +static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) +{ + struct hl_device *hdev = hpriv->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + + if (!prop->hbw_flush_reg) { + dev_dbg(hdev->dev, "HBW flush is not supported\n"); + return -EOPNOTSUPP; + } + + RREG32(prop->hbw_flush_reg); + + return 0; +} + int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) { union hl_cs_args *args = data; @@ -2499,6 +2517,9 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, args->in.num_engine_cores, args->in.core_command); break; + case CS_TYPE_FLUSH_PCI_HBW_WRITES: + rc = cs_ioctl_flush_pci_hbw_writes(hpriv); + break; default: rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, args->in.cs_flags, diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index 7b6f10033ee9..95bbc00c6262 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -375,7 +375,8 @@ enum hl_cs_type { CS_TYPE_COLLECTIVE_WAIT, CS_RESERVE_SIGNALS, CS_UNRESERVE_SIGNALS, - CS_TYPE_ENGINE_CORE + CS_TYPE_ENGINE_CORE, + CS_TYPE_FLUSH_PCI_HBW_WRITES, }; /* @@ -644,6 +645,8 @@ struct hl_hints_range { * (i.e. the DRAM supports multiple page sizes), otherwise * it will shall be equal to dram_page_size. * @num_engine_cores: number of engine cpu cores + * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is + * not supported. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use @@ -764,6 +767,7 @@ struct asic_fixed_properties { u32 xbar_edge_enabled_mask; u32 device_mem_alloc_default_page_size; u32 num_engine_cores; + u32 hbw_flush_reg; u16 collective_first_sob; u16 collective_first_mon; u16 sync_stream_first_sob; diff --git a/drivers/accel/habanalabs/gaudi/gaudi.c b/drivers/accel/habanalabs/gaudi/gaudi.c index 1b701a87c6fe..13f9e6c0cd90 100644 --- a/drivers/accel/habanalabs/gaudi/gaudi.c +++ b/drivers/accel/habanalabs/gaudi/gaudi.c @@ -701,6 +701,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev) prop->dma_mask = 48; + prop->hbw_flush_reg = mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL; + return 0; } diff --git a/drivers/accel/habanalabs/gaudi2/gaudi2.c b/drivers/accel/habanalabs/gaudi2/gaudi2.c index 7df1a68dd403..4529a64d49b6 100644 --- a/drivers/accel/habanalabs/gaudi2/gaudi2.c +++ b/drivers/accel/habanalabs/gaudi2/gaudi2.c @@ -2071,6 +2071,8 @@ static int gaudi2_set_fixed_properties(struct hl_device *hdev) prop->dma_mask = 64; + prop->hbw_flush_reg = mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0; + return 0; } diff --git a/drivers/accel/habanalabs/include/gaudi/asic_reg/gaudi_regs.h b/drivers/accel/habanalabs/include/gaudi/asic_reg/gaudi_regs.h index 1a6576666794..23ee8691db46 100644 --- a/drivers/accel/habanalabs/include/gaudi/asic_reg/gaudi_regs.h +++ b/drivers/accel/habanalabs/include/gaudi/asic_reg/gaudi_regs.h @@ -320,4 +320,6 @@ #define mmPSOC_TPC_PLL_NR 0xC73100 #define mmIF_W_PLL_NR 0x488100 +#define mmPCIE_WRAP_RR_ELBI_RD_SEC_REG_CTRL 0xC01208 + #endif /* ASIC_REG_GAUDI_REGS_H_ */ diff --git a/include/uapi/drm/habanalabs_accel.h b/include/uapi/drm/habanalabs_accel.h index 90e628779264..331567ec9e79 100644 --- a/include/uapi/drm/habanalabs_accel.h +++ b/include/uapi/drm/habanalabs_accel.h @@ -1478,6 +1478,14 @@ struct hl_cs_chunk { */ #define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 +/* + * The flush HBW PCI writes is merged into the existing CS ioctls. + * Used to flush all HBW PCI writes. + * This is a blocking operation and for this reason the user shall not use + * the return sequence number (which will be invalid anyway) + */ +#define HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES 0x8000 + #define HL_CS_STATUS_SUCCESS 0 #define HL_MAX_JOBS_PER_CS 512 -- cgit v1.3.1