diff options
author | Martin K. Petersen <martin.petersen@oracle.com> | 2023-05-22 16:35:02 -0400 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2023-05-22 16:35:02 -0400 |
commit | 7907ad748bdba8ac9ca47f0a650cc2e5d2ad6e24 (patch) | |
tree | 068ffd5248c8c988015fc751fe8b68dd51347943 /include | |
parent | 16853cd8f6d44d774f683d670be38c7d91eb32b8 (diff) | |
parent | 394f811848827ad23d2b43e94e5d72a24cfbc39f (diff) |
Merge patch series "Use block pr_ops in LIO"
Mike Christie <michael.christie@oracle.com> says:
The patches in this thread allow us to use the block pr_ops with LIO's
target_core_iblock module to support cluster applications in VMs. They
were built over Linus's tree. They also apply over linux-next and
Martin's tree and Jens's trees.
Currently, to use windows clustering or linux clustering (pacemaker +
cluster labs scsi fence agents) in VMs with LIO and vhost-scsi, you
have to use tcmu or pscsi or use a cluster aware FS/framework for the
LIO pr file. Setting up a cluster FS/framework is pain and waste when
your real backend device is already a distributed device, and pscsi
and tcmu are nice for specific use cases, but iblock gives you the
best performance and allows you to use stacked devices like
dm-multipath. So these patches allow iblock to work like pscsi/tcmu
where they can pass a PR command to the backend module. And then
iblock will use the pr_ops to pass the PR command to the real devices
similar to what we do for unmap today.
The patches are separated in the following groups:
Patch 1 - 2:
- Add block layer callouts for reading reservations and rename reservation
error code.
Patch 3 - 5:
- SCSI support for new callouts.
Patch 6:
- DM support for new callouts.
Patch 7 - 13:
- NVMe support for new callouts.
Patch 14 - 18:
- LIO support for new callouts.
This patchset has been tested with the libiscsi PGR ops and with
window's failover cluster verification test. Note that for scsi
backend devices we need this patchset:
https://lore.kernel.org/linux-scsi/20230123221046.125483-1-michael.christie@oracle.com/T/#m4834a643ffb5bac2529d65d40906d3cfbdd9b1b7
to handle UAs. To reduce the size of this patchset that's being done
separately to make reviewing easier. And to make merging easier this
patchset and the one above do not have any conflicts so can be merged
in different trees.
Link: https://lore.kernel.org/r/20230407200551.12660-1-michael.christie@oracle.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/blk_types.h | 4 | ||||
-rw-r--r-- | include/linux/nvme.h | 51 | ||||
-rw-r--r-- | include/linux/pr.h | 25 | ||||
-rw-r--r-- | include/scsi/scsi_common.h | 13 | ||||
-rw-r--r-- | include/scsi/scsi_proto.h | 5 | ||||
-rw-r--r-- | include/target/target_core_backend.h | 8 | ||||
-rw-r--r-- | include/target/target_core_base.h | 3 |
7 files changed, 96 insertions, 13 deletions
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 740afe80f297..936e898016f8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -101,7 +101,7 @@ typedef u16 blk_short_t; #define BLK_STS_NOSPC ((__force blk_status_t)3) #define BLK_STS_TRANSPORT ((__force blk_status_t)4) #define BLK_STS_TARGET ((__force blk_status_t)5) -#define BLK_STS_NEXUS ((__force blk_status_t)6) +#define BLK_STS_RESV_CONFLICT ((__force blk_status_t)6) #define BLK_STS_MEDIUM ((__force blk_status_t)7) #define BLK_STS_PROTECTION ((__force blk_status_t)8) #define BLK_STS_RESOURCE ((__force blk_status_t)9) @@ -189,7 +189,7 @@ static inline bool blk_path_error(blk_status_t error) case BLK_STS_NOTSUPP: case BLK_STS_NOSPC: case BLK_STS_TARGET: - case BLK_STS_NEXUS: + case BLK_STS_RESV_CONFLICT: case BLK_STS_MEDIUM: case BLK_STS_PROTECTION: return false; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 779507ac750b..182b6d614eb1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -759,20 +759,55 @@ enum { NVME_LBART_ATTRIB_HIDE = 1 << 1, }; +enum nvme_pr_type { + NVME_PR_WRITE_EXCLUSIVE = 1, + NVME_PR_EXCLUSIVE_ACCESS = 2, + NVME_PR_WRITE_EXCLUSIVE_REG_ONLY = 3, + NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY = 4, + NVME_PR_WRITE_EXCLUSIVE_ALL_REGS = 5, + NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS = 6, +}; + +enum nvme_eds { + NVME_EXTENDED_DATA_STRUCT = 0x1, +}; + +struct nvme_registered_ctrl { + __le16 cntlid; + __u8 rcsts; + __u8 rsvd3[5]; + __le64 hostid; + __le64 rkey; +}; + struct nvme_reservation_status { __le32 gen; __u8 rtype; __u8 regctl[2]; __u8 resv5[2]; __u8 ptpls; - __u8 resv10[13]; - struct { - __le16 cntlid; - __u8 rcsts; - __u8 resv3[5]; - __le64 hostid; - __le64 rkey; - } regctl_ds[]; + __u8 resv10[14]; + struct nvme_registered_ctrl regctl_ds[]; +}; + +struct nvme_registered_ctrl_ext { + __le16 cntlid; + __u8 rcsts; + __u8 rsvd3[5]; + __le64 rkey; + __u8 hostid[16]; + __u8 rsvd32[32]; +}; + +struct nvme_reservation_status_ext { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[14]; + __u8 rsvd24[40]; + struct nvme_registered_ctrl_ext regctl_eds[]; }; enum nvme_async_event_type { diff --git a/include/linux/pr.h b/include/linux/pr.h index 94ceec713afe..3003daec28a5 100644 --- a/include/linux/pr.h +++ b/include/linux/pr.h @@ -4,6 +4,18 @@ #include <uapi/linux/pr.h> +struct pr_keys { + u32 generation; + u32 num_keys; + u64 keys[]; +}; + +struct pr_held_reservation { + u64 key; + u32 generation; + enum pr_type type; +}; + struct pr_ops { int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, u32 flags); @@ -14,6 +26,19 @@ struct pr_ops { int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, enum pr_type type, bool abort); int (*pr_clear)(struct block_device *bdev, u64 key); + /* + * pr_read_keys - Read the registered keys and return them in the + * pr_keys->keys array. The keys array will have been allocated at the + * end of the pr_keys struct, and pr_keys->num_keys must be set to the + * number of keys the array can hold. If there are more than can fit + * in the array, success will still be returned and pr_keys->num_keys + * will reflect the total number of keys the device contains, so the + * caller can retry with a larger array. + */ + int (*pr_read_keys)(struct block_device *bdev, + struct pr_keys *keys_info); + int (*pr_read_reservation)(struct block_device *bdev, + struct pr_held_reservation *rsv); }; #endif /* LINUX_PR_H */ diff --git a/include/scsi/scsi_common.h b/include/scsi/scsi_common.h index 5b567b43e1b1..fb58715fac86 100644 --- a/include/scsi/scsi_common.h +++ b/include/scsi/scsi_common.h @@ -7,8 +7,21 @@ #define _SCSI_COMMON_H_ #include <linux/types.h> +#include <uapi/linux/pr.h> #include <scsi/scsi_proto.h> +enum scsi_pr_type { + SCSI_PR_WRITE_EXCLUSIVE = 0x01, + SCSI_PR_EXCLUSIVE_ACCESS = 0x03, + SCSI_PR_WRITE_EXCLUSIVE_REG_ONLY = 0x05, + SCSI_PR_EXCLUSIVE_ACCESS_REG_ONLY = 0x06, + SCSI_PR_WRITE_EXCLUSIVE_ALL_REGS = 0x07, + SCSI_PR_EXCLUSIVE_ACCESS_ALL_REGS = 0x08, +}; + +enum scsi_pr_type block_pr_type_to_scsi(enum pr_type type); +enum pr_type scsi_pr_type_to_block(enum scsi_pr_type type); + static inline unsigned scsi_varlen_cdb_length(const void *hdr) { diff --git a/include/scsi/scsi_proto.h b/include/scsi/scsi_proto.h index fbe5bdfe4d6e..07d65c1f59db 100644 --- a/include/scsi/scsi_proto.h +++ b/include/scsi/scsi_proto.h @@ -151,6 +151,11 @@ #define ZO_FINISH_ZONE 0x02 #define ZO_OPEN_ZONE 0x03 #define ZO_RESET_WRITE_POINTER 0x04 +/* values for PR in service action */ +#define READ_KEYS 0x00 +#define READ_RESERVATION 0x01 +#define REPORT_CAPABILITES 0x02 +#define READ_FULL_STATUS 0x03 /* values for variable length command */ #define XDREAD_32 0x03 #define XDWRITE_32 0x04 diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index a3c193df25b3..739df993aa5e 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -62,13 +62,17 @@ struct target_backend_ops { struct configfs_attribute **tb_dev_action_attrs; }; -struct sbc_ops { +struct exec_cmd_ops { sense_reason_t (*execute_rw)(struct se_cmd *cmd, struct scatterlist *, u32, enum dma_data_direction); sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd); sense_reason_t (*execute_write_same)(struct se_cmd *cmd); sense_reason_t (*execute_unmap)(struct se_cmd *cmd, sector_t lba, sector_t nolb); + sense_reason_t (*execute_pr_out)(struct se_cmd *cmd, u8 sa, u64 key, + u64 sa_key, u8 type, bool aptpl); + sense_reason_t (*execute_pr_in)(struct se_cmd *cmd, u8 sa, + unsigned char *param_data); }; int transport_backend_register(const struct target_backend_ops *); @@ -86,7 +90,7 @@ sense_reason_t spc_emulate_report_luns(struct se_cmd *cmd); sense_reason_t spc_emulate_inquiry_std(struct se_cmd *, unsigned char *); sense_reason_t spc_emulate_evpd_83(struct se_cmd *, unsigned char *); -sense_reason_t sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops); +sense_reason_t sbc_parse_cdb(struct se_cmd *cmd, struct exec_cmd_ops *ops); u32 sbc_get_device_rev(struct se_device *dev); u32 sbc_get_device_type(struct se_device *dev); sector_t sbc_get_write_same_sectors(struct se_cmd *cmd); diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 5f8e96f1516f..159567359bbb 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -880,7 +880,8 @@ struct target_opcode_descriptor { u8 specific_timeout; u16 nominal_timeout; u16 recommended_timeout; - bool (*enabled)(struct se_cmd *cmd); + bool (*enabled)(struct target_opcode_descriptor *descr, + struct se_cmd *cmd); void (*update_usage_bits)(u8 *usage_bits, struct se_device *dev); u8 usage_bits[]; |