summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:37:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-02 15:37:03 -0700
commitbce159d734091fe31340976081577333f52a85e4 (patch)
tree8396be51e6703797a60aefb4992e729f327d27c2 /drivers
parent750a02ab8d3c49ca7d23102be90d3d1db19e2827 (diff)
parent0c8d3fceade2ab1bbac68bca013e62bfdb851d19 (diff)
Merge tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe: "On top of the core changes, here are the block driver changes for this merge window: - NVMe changes: - NVMe over Fibre Channel protocol updates, which also reach over to drivers/scsi/lpfc (James Smart) - namespace revalidation support on the target (Anthony Iliopoulos) - gcc zero length array fix (Arnd Bergmann) - nvmet cleanups (Chaitanya Kulkarni) - misc cleanups and fixes (me, Keith Busch, Sagi Grimberg) - use a SRQ per completion vector (Max Gurtovoy) - fix handling of runtime changes to the queue count (Weiping Zhang) - t10 protection information support for nvme-rdma and nvmet-rdma (Israel Rukshin and Max Gurtovoy) - target side AEN improvements (Chaitanya Kulkarni) - various fixes and minor improvements all over, icluding the nvme part of the lpfc driver" - Floppy code cleanup series (Willy, Denis) - Floppy contention fix (Jiri) - Loop CONFIGURE support (Martijn) - bcache fixes/improvements (Coly, Joe, Colin) - q->queuedata cleanups (Christoph) - Get rid of ioctl_by_bdev (Christoph, Stefan) - md/raid5 allocation fixes (Coly) - zero length array fixes (Gustavo) - swim3 task state fix (Xu)" * tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block: (166 commits) bcache: configure the asynchronous registertion to be experimental bcache: asynchronous devices registration bcache: fix refcount underflow in bcache_device_free() bcache: Convert pr_<level> uses to a more typical style bcache: remove redundant variables i and n lpfc: Fix return value in __lpfc_nvme_ls_abort lpfc: fix axchg pointer reference after free and double frees lpfc: Fix pointer checks and comments in LS receive refactoring nvme: set dma alignment to qword nvmet: cleanups the loop in nvmet_async_events_process nvmet: fix memory leak when removing namespaces and controllers concurrently nvmet-rdma: add metadata/T10-PI support nvmet: add metadata support for block devices nvmet: add metadata/T10-PI support nvme: add Metadata Capabilities enumerations nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len nvmet: rename nvmet_rw_len to nvmet_rw_data_len nvmet: add metadata characteristics for a namespace nvme-rdma: add metadata/T10-PI support nvme-rdma: introduce nvme_rdma_sgl structure ...
Diffstat (limited to 'drivers')
-rw-r--r--drivers/block/floppy.c466
-rw-r--r--drivers/block/loop.c383
-rw-r--r--drivers/block/swim.c6
-rw-r--r--drivers/md/bcache/Kconfig9
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/bset.c6
-rw-r--r--drivers/md/bcache/btree.c16
-rw-r--r--drivers/md/bcache/extents.c12
-rw-r--r--drivers/md/bcache/io.c8
-rw-r--r--drivers/md/bcache/journal.c34
-rw-r--r--drivers/md/bcache/request.c6
-rw-r--r--drivers/md/bcache/super.c232
-rw-r--r--drivers/md/bcache/sysfs.c8
-rw-r--r--drivers/md/bcache/writeback.c6
-rw-r--r--drivers/md/md-linear.h2
-rw-r--r--drivers/md/md.c71
-rw-r--r--drivers/md/md.h1
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid1.h2
-rw-r--r--drivers/md/raid10.h2
-rw-r--r--drivers/md/raid5.c22
-rw-r--r--drivers/nvme/host/core.c322
-rw-r--r--drivers/nvme/host/fc.c577
-rw-r--r--drivers/nvme/host/fc.h227
-rw-r--r--drivers/nvme/host/lightnvm.c7
-rw-r--r--drivers/nvme/host/multipath.c16
-rw-r--r--drivers/nvme/host/nvme.h28
-rw-r--r--drivers/nvme/host/pci.c117
-rw-r--r--drivers/nvme/host/rdma.c321
-rw-r--r--drivers/nvme/host/tcp.c64
-rw-r--r--drivers/nvme/target/Kconfig1
-rw-r--r--drivers/nvme/target/admin-cmd.c42
-rw-r--r--drivers/nvme/target/configfs.c272
-rw-r--r--drivers/nvme/target/core.c166
-rw-r--r--drivers/nvme/target/discovery.c8
-rw-r--r--drivers/nvme/target/fabrics-cmd.c15
-rw-r--r--drivers/nvme/target/fc.c805
-rw-r--r--drivers/nvme/target/fcloop.c155
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c118
-rw-r--r--drivers/nvme/target/io-cmd-file.c23
-rw-r--r--drivers/nvme/target/nvmet.h36
-rw-r--r--drivers/nvme/target/rdma.c416
-rw-r--r--drivers/nvme/target/tcp.c53
-rw-r--r--drivers/nvme/target/trace.h28
-rw-r--r--drivers/s390/block/dasd_ioctl.c76
-rw-r--r--drivers/scsi/lpfc/lpfc.h2
-rw-r--r--drivers/scsi/lpfc/lpfc_attr.c3
-rw-r--r--drivers/scsi/lpfc/lpfc_crtn.h9
-rw-r--r--drivers/scsi/lpfc/lpfc_ct.c1
-rw-r--r--drivers/scsi/lpfc/lpfc_debugfs.c5
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c8
-rw-r--r--drivers/scsi/lpfc/lpfc_init.c7
-rw-r--r--drivers/scsi/lpfc/lpfc_mem.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_nportdisc.c13
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.c491
-rw-r--r--drivers/scsi/lpfc/lpfc_nvme.h180
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.c833
-rw-r--r--drivers/scsi/lpfc/lpfc_nvmet.h158
-rw-r--r--drivers/scsi/lpfc/lpfc_sli.c128
59 files changed, 4931 insertions, 2111 deletions
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index c3daa64cb52c..3e9db22db2a8 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -337,8 +337,7 @@ static bool initialized;
/*
* globals used by 'result()'
*/
-#define MAX_REPLIES 16
-static unsigned char reply_buffer[MAX_REPLIES];
+static unsigned char reply_buffer[FD_RAW_REPLY_SIZE];
static int inr; /* size of reply buffer, when called from interrupt */
#define ST0 0
#define ST1 1
@@ -595,12 +594,12 @@ static unsigned char in_sector_offset; /* offset within physical sector,
static inline unsigned char fdc_inb(int fdc, int reg)
{
- return fd_inb(fdc_state[fdc].address + reg);
+ return fd_inb(fdc_state[fdc].address, reg);
}
static inline void fdc_outb(unsigned char value, int fdc, int reg)
{
- fd_outb(value, fdc_state[fdc].address + reg);
+ fd_outb(value, fdc_state[fdc].address, reg);
}
static inline bool drive_no_geom(int drive)
@@ -668,16 +667,12 @@ static struct output_log {
static int output_log_pos;
-#define current_reqD -1
#define MAXTIMEOUT -2
static void __reschedule_timeout(int drive, const char *message)
{
unsigned long delay;
- if (drive == current_reqD)
- drive = current_drive;
-
if (drive < 0 || drive >= N_DRIVE) {
delay = 20UL * HZ;
drive = 0;
@@ -827,59 +822,70 @@ static int set_dor(int fdc, char mask, char data)
return olddor;
}
-static void twaddle(void)
+static void twaddle(int fdc, int drive)
{
- if (drive_params[current_drive].select_delay)
+ if (drive_params[drive].select_delay)
return;
- fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)),
- current_fdc, FD_DOR);
- fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
- drive_state[current_drive].select_date = jiffies;
+ fdc_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(drive)),
+ fdc, FD_DOR);
+ fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR);
+ drive_state[drive].select_date = jiffies;
}
/*
- * Reset all driver information about the current fdc.
+ * Reset all driver information about the specified fdc.
* This is needed after a reset, and after a raw command.
*/
-static void reset_fdc_info(int mode)
+static void reset_fdc_info(int fdc, int mode)
{
int drive;
- fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1;
- fdc_state[current_fdc].need_configure = 1;
- fdc_state[current_fdc].perp_mode = 1;
- fdc_state[current_fdc].rawcmd = 0;
+ fdc_state[fdc].spec1 = fdc_state[fdc].spec2 = -1;
+ fdc_state[fdc].need_configure = 1;
+ fdc_state[fdc].perp_mode = 1;
+ fdc_state[fdc].rawcmd = 0;
for (drive = 0; drive < N_DRIVE; drive++)
- if (FDC(drive) == current_fdc &&
+ if (FDC(drive) == fdc &&
(mode || drive_state[drive].track != NEED_1_RECAL))
drive_state[drive].track = NEED_2_RECAL;
}
-/* selects the fdc and drive, and enables the fdc's input/dma. */
+/*
+ * selects the fdc and drive, and enables the fdc's input/dma.
+ * Both current_drive and current_fdc are changed to match the new drive.
+ */
static void set_fdc(int drive)
{
- unsigned int new_fdc = current_fdc;
+ unsigned int fdc;
- if (drive >= 0 && drive < N_DRIVE) {
- new_fdc = FDC(drive);
- current_drive = drive;
+ if (drive < 0 || drive >= N_DRIVE) {
+ pr_info("bad drive value %d\n", drive);
+ return;
}
- if (new_fdc >= N_FDC) {
+
+ fdc = FDC(drive);
+ if (fdc >= N_FDC) {
pr_info("bad fdc value\n");
return;
}
- current_fdc = new_fdc;
- set_dor(current_fdc, ~0, 8);
+
+ set_dor(fdc, ~0, 8);
#if N_FDC > 1
- set_dor(1 - current_fdc, ~8, 0);
+ set_dor(1 - fdc, ~8, 0);
#endif
- if (fdc_state[current_fdc].rawcmd == 2)
- reset_fdc_info(1);
- if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY)
- fdc_state[current_fdc].reset = 1;
+ if (fdc_state[fdc].rawcmd == 2)
+ reset_fdc_info(fdc, 1);
+ if (fdc_inb(fdc, FD_STATUS) != STATUS_READY)
+ fdc_state[fdc].reset = 1;
+
+ current_drive = drive;
+ current_fdc = fdc;
}
-/* locks the driver */
+/*
+ * locks the driver.
+ * Both current_drive and current_fdc are changed to match the new drive.
+ */
static int lock_fdc(int drive)
{
if (WARN(atomic_read(&usage_count) == 0,
@@ -1062,12 +1068,9 @@ static void setup_DMA(void)
unsigned long f;
if (raw_cmd->length == 0) {
- int i;
-
- pr_info("zero dma transfer size:");
- for (i = 0; i < raw_cmd->cmd_count; i++)
- pr_cont("%x,", raw_cmd->cmd[i]);
- pr_cont("\n");
+ print_hex_dump(KERN_INFO, "zero dma transfer size: ",
+ DUMP_PREFIX_NONE, 16, 1,
+ raw_cmd->fullcmd, raw_cmd->cmd_count, false);
cont->done(0);
fdc_state[current_fdc].reset = 1;
return;
@@ -1104,62 +1107,62 @@ static void setup_DMA(void)
#endif
}
-static void show_floppy(void);
+static void show_floppy(int fdc);
/* waits until the fdc becomes ready */
-static int wait_til_ready(void)
+static int wait_til_ready(int fdc)
{
int status;
int counter;
- if (fdc_state[current_fdc].reset)
+ if (fdc_state[fdc].reset)
return -1;
for (counter = 0; counter < 10000; counter++) {
- status = fdc_inb(current_fdc, FD_STATUS);
+ status = fdc_inb(fdc, FD_STATUS);
if (status & STATUS_READY)
return status;
}
if (initialized) {
- DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc);
- show_floppy();
+ DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc);
+ show_floppy(fdc);
}
- fdc_state[current_fdc].reset = 1;
+ fdc_state[fdc].reset = 1;
return -1;
}
/* sends a command byte to the fdc */
-static int output_byte(char byte)
+static int output_byte(int fdc, char byte)
{
- int status = wait_til_ready();
+ int status = wait_til_ready(fdc);
if (status < 0)
return -1;
if (is_ready_state(status)) {
- fdc_outb(byte, current_fdc, FD_DATA);
+ fdc_outb(byte, fdc, FD_DATA);
output_log[output_log_pos].data = byte;
output_log[output_log_pos].status = status;
output_log[output_log_pos].jiffies = jiffies;
output_log_pos = (output_log_pos + 1) % OLOGSIZE;
return 0;
}
- fdc_state[current_fdc].reset = 1;
+ fdc_state[fdc].reset = 1;
if (initialized) {
DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n",
- byte, current_fdc, status);
- show_floppy();
+ byte, fdc, status);
+ show_floppy(fdc);
}
return -1;
}
/* gets the response from the fdc */
-static int result(void)
+static int result(int fdc)
{
int i;
int status = 0;
- for (i = 0; i < MAX_REPLIES; i++) {
- status = wait_til_ready();
+ for (i = 0; i < FD_RAW_REPLY_SIZE; i++) {
+ status = wait_til_ready(fdc);
if (status < 0)
break;
status &= STATUS_DIR | STATUS_READY | STATUS_BUSY | STATUS_DMA;
@@ -1169,24 +1172,24 @@ static int result(void)
return i;
}
if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY))
- reply_buffer[i] = fdc_inb(current_fdc, FD_DATA);
+ reply_buffer[i] = fdc_inb(fdc, FD_DATA);
else
break;
}
if (initialized) {
DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n",
- current_fdc, status, i);
- show_floppy();
+ fdc, status, i);
+ show_floppy(fdc);
}
- fdc_state[current_fdc].reset = 1;
+ fdc_state[fdc].reset = 1;
return -1;
}
#define MORE_OUTPUT -2
/* does the fdc need more output? */
-static int need_more_output(void)
+static int need_more_output(int fdc)
{
- int status = wait_til_ready();
+ int status = wait_til_ready(fdc);
if (status < 0)
return -1;
@@ -1194,13 +1197,13 @@ static int need_more_output(void)
if (is_ready_state(status))
return MORE_OUTPUT;
- return result();
+ return result(fdc);
}
/* Set perpendicular mode as required, based on data rate, if supported.
* 82077 Now tested. 1Mbps data rate only possible with 82077-1.
*/
-static void perpendicular_mode(void)
+static void perpendicular_mode(int fdc)
{
unsigned char perp_mode;
@@ -1215,7 +1218,7 @@ static void perpendicular_mode(void)
default:
DPRINT("Invalid data rate for perpendicular mode!\n");
cont->done(0);
- fdc_state[current_fdc].reset = 1;
+ fdc_state[fdc].reset = 1;
/*
* convenient way to return to
* redo without too much hassle
@@ -1226,12 +1229,12 @@ static void perpendicular_mode(void)
} else
perp_mode = 0;
- if (fdc_state[current_fdc].perp_mode == perp_mode)
+ if (fdc_state[fdc].perp_mode == perp_mode)
return;
- if (fdc_state[current_fdc].version >= FDC_82077_ORIG) {
- output_byte(FD_PERPENDICULAR);
- output_byte(perp_mode);
- fdc_state[current_fdc].perp_mode = perp_mode;
+ if (fdc_state[fdc].version >= FDC_82077_ORIG) {
+ output_byte(fdc, FD_PERPENDICULAR);
+ output_byte(fdc, perp_mode);
+ fdc_state[fdc].perp_mode = perp_mode;
} else if (perp_mode) {
DPRINT("perpendicular mode not supported by this FDC.\n");
}
@@ -1240,16 +1243,15 @@ static void perpendicular_mode(void)
static int fifo_depth = 0xa;
static int no_fifo;
-static int fdc_configure(void)
+static int fdc_configure(int fdc)
{
/* Turn on FIFO */
- output_byte(FD_CONFIGURE);
- if (need_more_output() != MORE_OUTPUT)
+ output_byte(fdc, FD_CONFIGURE);
+ if (need_more_output(fdc) != MORE_OUTPUT)
return 0;
- output_byte(0);
- output_byte(0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf));
- output_byte(0); /* pre-compensation from track
- 0 upwards */
+ output_byte(fdc, 0);
+ output_byte(fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf));
+ output_byte(fdc, 0); /* pre-compensation from track 0 upwards */
return 1;
}
@@ -1274,7 +1276,7 @@ static int fdc_configure(void)
*
* These values are rounded up to the next highest available delay time.
*/
-static void fdc_specify(void)
+static void fdc_specify(int fdc, int drive)
{
unsigned char spec1;
unsigned char spec2;
@@ -1286,10 +1288,10 @@ static void fdc_specify(void)
int hlt_max_code = 0x7f;
int hut_max_code = 0xf;
- if (fdc_state[current_fdc].need_configure &&
- fdc_state[current_fdc].version >= FDC_82072A) {
- fdc_configure();
- fdc_state[current_fdc].need_configure = 0;
+ if (fdc_state[fdc].need_configure &&
+ fdc_state[fdc].version >= FDC_82072A) {
+ fdc_configure(fdc);
+ fdc_state[fdc].need_configure = 0;
}
switch (raw_cmd->rate & 0x03) {
@@ -1298,13 +1300,13 @@ static void fdc_specify(void)
break;
case 1:
dtr = 300;
- if (fdc_state[current_fdc].version >= FDC_82078) {
+ if (fdc_state[fdc].version >= FDC_82078) {
/* chose the default rate table, not the one
* where 1 = 2 Mbps */
- output_byte(FD_DRIVESPEC);
- if (need_more_output() == MORE_OUTPUT) {
- output_byte(UNIT(current_drive));
- output_byte(0xc0);
+ output_byte(fdc, FD_DRIVESPEC);
+ if (need_more_output(fdc) == MORE_OUTPUT) {
+ output_byte(fdc, UNIT(drive));
+ output_byte(fdc, 0xc0);
}
}
break;
@@ -1313,14 +1315,14 @@ static void fdc_specify(void)
break;
}
- if (fdc_state[current_fdc].version >= FDC_82072) {
+ if (fdc_state[fdc].version >= FDC_82072) {
scale_dtr = dtr;
hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */
hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */
}
/* Convert step rate from microseconds to milliseconds and 4 bits */
- srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000,
+ srt = 16 - DIV_ROUND_UP(drive_params[drive].srt * scale_dtr / 1000,
NOMINAL_DTR);
if (slow_floppy)
srt = srt / 4;
@@ -1328,14 +1330,14 @@ static void fdc_specify(void)
SUPBOUND(srt, 0xf);
INFBOUND(srt, 0);
- hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2,
+ hlt = DIV_ROUND_UP(drive_params[drive].hlt * scale_dtr / 2,
NOMINAL_DTR);
if (hlt < 0x01)
hlt = 0x01;
else if (hlt > 0x7f)
hlt = hlt_max_code;
- hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16,
+ hut = DIV_ROUND_UP(drive_params[drive].hut * scale_dtr / 16,
NOMINAL_DTR);
if (hut < 0x1)
hut = 0x1;
@@ -1346,12 +1348,12 @@ static void fdc_specify(void)
spec2 = (hlt << 1) | (use_virtual_dma & 1);
/* If these parameters did not change, just return with success */
- if (fdc_state[current_fdc].spec1 != spec1 ||
- fdc_state[current_fdc].spec2 != spec2) {
+ if (fdc_state[fdc].spec1 != spec1 ||
+ fdc_state[fdc].spec2 != spec2) {
/* Go ahead and set spec1 and spec2 */
- output_byte(FD_SPECIFY);
- output_byte(fdc_state[current_fdc].spec1 = spec1);
- output_byte(fdc_state[current_fdc].spec2 = spec2);
+ output_byte(fdc, FD_SPECIFY);
+ output_byte(fdc, fdc_state[fdc].spec1 = spec1);
+ output_byte(fdc, fdc_state[fdc].spec2 = spec2);
}
} /* fdc_specify */
@@ -1513,7 +1515,7 @@ static void setup_rw_floppy(void)
r = 0;
for (i = 0; i < raw_cmd->cmd_count; i++)
- r |= output_byte(raw_cmd->cmd[i]);
+ r |= output_byte(current_fdc, raw_cmd->fullcmd[i]);
debugt(__func__, "rw_command");
@@ -1524,7 +1526,7 @@ static void setup_rw_floppy(void)
}
if (!(flags & FD_RAW_INTR)) {
- inr = result();
+ inr = result(current_fdc);
cont->interrupt();
} else if (flags & FD_RAW_NEED_DISK)
fd_watchdog();
@@ -1562,29 +1564,29 @@ static void seek_interrupt(void)
floppy_ready();
}
-static void check_wp(void)
+static void check_wp(int fdc, int drive)
{
- if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) {
+ if (test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)) {
/* check write protection */
- output_byte(FD_GETSTATUS);
- output_byte(UNIT(current_drive));
- if (result() != 1) {
- fdc_state[current_fdc].reset = 1;
+ output_byte(fdc, FD_GETSTATUS);
+ output_byte(fdc, UNIT(drive));
+ if (result(fdc) != 1) {
+ fdc_state[fdc].reset = 1;
return;
}
- clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags);
+ clear_bit(FD_VERIFY_BIT, &drive_state[drive].flags);
clear_bit(FD_NEED_TWADDLE_BIT,
- &drive_state[current_drive].flags);
- debug_dcl(drive_params[current_drive].flags,
+ &drive_state[drive].flags);
+ debug_dcl(drive_params[drive].flags,
"checking whether disk is write protected\n");
- debug_dcl(drive_params[current_drive].flags, "wp=%x\n",
+ debug_dcl(drive_params[drive].flags, "wp=%x\n",
reply_buffer[ST3] & 0x40);
if (!(reply_buffer[ST3] & 0x40))
set_bit(FD_DISK_WRITABLE_BIT,
- &drive_state[current_drive].flags);
+ &drive_state[drive].flags);
else
clear_bit(FD_DISK_WRITABLE_BIT,
- &drive_state[current_drive].flags);
+ &drive_state[drive].flags);
}
}
@@ -1628,7 +1630,7 @@ static void seek_floppy(void)
track = 1;
}
} else {
- check_wp();
+ check_wp(current_fdc, current_drive);
if (raw_cmd->track != drive_state[current_drive].track &&
(raw_cmd->flags & FD_RAW_NEED_SEEK))
track = raw_cmd->track;
@@ -1639,9 +1641,9 @@ static void seek_floppy(void)
}
do_floppy = seek_interrupt;
- output_byte(FD_SEEK);
- output_byte(UNIT(current_drive));
- if (output_byte(track) < 0) {
+ output_byte(current_fdc, FD_SEEK);
+ output_byte(current_fdc, UNIT(current_drive));
+ if (output_byte(current_fdc, track) < 0) {
reset_fdc();
return;
}
@@ -1742,14 +1744,14 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id)
do_print = !handler && print_unex && initialized;
- inr = result();
+ inr = result(current_fdc);
if (do_print)
print_result("unexpected interrupt", inr);
if (inr == 0) {
int max_sensei = 4;
do {
- output_byte(FD_SENSEI);
- inr = result();
+ output_byte(current_fdc, FD_SENSEI);
+ inr = result(current_fdc);
if (do_print)
print_result("sensei", inr);
max_sensei--;
@@ -1771,8 +1773,8 @@ static void recalibrate_floppy(void)
{
debugt(__func__, "");
do_floppy = recal_interrupt;
- output_byte(FD_RECALIBRATE);
- if (output_byte(UNIT(current_drive)) < 0)
+ output_byte(current_fdc, FD_RECALIBRATE);
+ if (output_byte(current_fdc, UNIT(current_drive)) < 0)
reset_fdc();
}
@@ -1782,7 +1784,7 @@ static void recalibrate_floppy(void)
static void reset_interrupt(void)
{
debugt(__func__, "");
- result(); /* get the status ready for set_fdc */
+ result(current_fdc); /* get the status ready for set_fdc */
if (fdc_state[current_fdc].reset) {
pr_info("reset set in interrupt, calling %ps\n", cont->error);
cont->error(); /* a reset just after a reset. BAD! */
@@ -1792,7 +1794,9 @@ static void reset_interrupt(void)
/*
* reset is done by pulling bit 2 of DOR low for a while (old FDCs),
- * or by setting the self clearing bit 7 of STATUS (newer FDCs)
+ * or by setting the self clearing bit 7 of STATUS (newer FDCs).
+ * This WILL trigger an interrupt, causing the handlers in the current
+ * cont's ->redo() to be called via reset_interrupt().
*/
static void reset_fdc(void)
{
@@ -1800,7 +1804,7 @@ static void reset_fdc(void)
do_floppy = reset_interrupt;
fdc_state[current_fdc].reset = 0;
- reset_fdc_info(0);
+ reset_fdc_info(current_fdc, 0);
/* Pseudo-DMA may intercept 'reset finished' interrupt. */
/* Irrelevant for systems with true DMA (i386). */
@@ -1819,7 +1823,7 @@ static void reset_fdc(void)
}
}
-static void show_floppy(void)
+static void show_floppy(int fdc)
{
int i;
@@ -1842,7 +1846,7 @@ static void show_floppy(void)
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
reply_buffer, resultsize, true);
- pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS));
+ pr_info("status=%x\n", fdc_inb(fdc, FD_STATUS));
pr_info("fdc_busy=%lu\n", fdc_busy);
if (do_floppy)
pr_info("do_floppy=%ps\n", do_floppy);
@@ -1868,7 +1872,7 @@ static void floppy_shutdown(struct work_struct *arg)
unsigned long flags;
if (initialized)
- show_floppy();
+ show_floppy(current_fdc);
cancel_activity();
flags = claim_dma_lock();
@@ -1934,7 +1938,7 @@ static void floppy_ready(void)
"calling disk change from floppy_ready\n");
if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) &&
disk_change(current_drive) && !drive_params[current_drive].select_delay)
- twaddle(); /* this clears the dcl on certain
+ twaddle(current_fdc, current_drive); /* this clears the dcl on certain
* drive/controller combinations */
#ifdef fd_chose_dma_mode
@@ -1946,20 +1950,20 @@ static void floppy_ready(void)
#endif
if (raw_cmd->flags & (FD_RAW_NEED_SEEK | FD_RAW_NEED_DISK)) {
- perpendicular_mode();
- fdc_specify(); /* must be done here because of hut, hlt ... */
+ perpendicular_mode(current_fdc);
+ fdc_specify(current_fdc, current_drive); /* must be done here because of hut, hlt ... */
seek_floppy();
} else {
if ((raw_cmd->flags & FD_RAW_READ) ||
(raw_cmd->flags & FD_RAW_WRITE))
- fdc_specify();
+ fdc_specify(current_fdc, current_drive);
setup_rw_floppy();
}
}
static void floppy_start(void)
{
- reschedule_timeout(current_reqD, "floppy start");
+ reschedule_timeout(current_drive, "floppy start");
scandrives();
debug_dcl(drive_params[current_drive].flags,
@@ -2004,6 +2008,9 @@ static const struct cont_t intr_cont = {
.done = (done_f)empty
};
+/* schedules handler, waiting for completion. May be interrupted, will then
+ * return -EINTR, in which case the driver will automatically be unlocked.
+ */
static int wait_til_done(void (*handler)(void), bool interruptible)
{
int ret;
@@ -2059,18 +2066,19 @@ static void success_and_wakeup(void)
* ==========================
*/
-static int next_valid_format(void)
+static int next_valid_format(int drive)
{
int probed_format;
- probed_format = drive_state[current_drive].probed_format;
+ probed_format = drive_state[drive].probed_format;
while (1) {
- if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) {
- drive_state[current_drive].probed_format = 0;
+ if (probed_format >= FD_AUTODETECT_SIZE ||
+ !drive_params[drive].autodetect[probed_format]) {
+ drive_state[drive].probed_format = 0;
return 1;
}
- if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) {
- drive_state[current_drive].probed_format = probed_format;
+ if (floppy_type[drive_params[drive].autodetect[probed_format]].sect) {
+ drive_state[drive].probed_format = probed_format;
return 0;
}
probed_format++;
@@ -2083,7 +2091,7 @@ static void bad_flp_intr(void)
if (probing) {
drive_state[current_drive].probed_format++;
- if (!next_valid_format())
+ if (!next_valid_format(current_drive))
return;
}
err_count = ++(*errors);
@@ -2843,6 +2851,9 @@ static int set_next_request(void)
return current_req != NULL;
}
+/* Starts or continues processing request. Will automatically unlock the
+ * driver at end of request.
+ */
static void redo_fd_request(void)
{
int drive;
@@ -2867,7 +2878,7 @@ do_request:
}
drive = (long)current_req->rq_disk->private_data;
set_fdc(drive);
- reschedule_timeout(current_reqD, "redo fd request");
+ reschedule_timeout(current_drive, "redo fd request");
set_floppy(drive);
raw_cmd = &default_raw_cmd;
@@ -2885,7 +2896,7 @@ do_request:
if (!_floppy) { /* Autodetection */
if (!probing) {
drive_state[current_drive].probed_format = 0;
- if (next_valid_format()) {
+ if (next_valid_format(current_drive)) {
DPRINT("no autodetectable formats\n");
_floppy = NULL;
request_done(0);
@@ -2904,7 +2915,7 @@ do_request:
}
if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags))
- twaddle();
+ twaddle(current_fdc, current_drive);
schedule_bh(floppy_start);
debugt(__func__, "queue fd request");
return;
@@ -2917,6 +2928,7 @@ static const struct cont_t rw_cont = {
.done = request_done
};
+/* schedule the request and automatically unlock the driver on completion */
static void process_fd_request(void)
{
cont = &rw_cont;
@@ -2938,17 +2950,17 @@ static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx,
(unsigned long long) current_req->cmd_flags))
return BLK_STS_IOERR;
- spin_lock_irq(&floppy_lock);
- list_add_tail(&bd->rq->queuelist, &floppy_reqs);
- spin_unlock_irq(&floppy_lock);
-
if (test_and_set_bit(0, &fdc_busy)) {
/* fdc busy, this new request will be treated when the
current one is done */
is_alive(__func__, "old request running");
- return BLK_STS_OK;
+ return BLK_STS_RESOURCE;
}
+ spin_lock_irq(&floppy_lock);
+ list_add_tail(&bd->rq->queuelist, &floppy_reqs);
+ spin_unlock_irq(&floppy_lock);
+
command_status = FD_COMMAND_NONE;
__reschedule_timeout(MAXTIMEOUT, "fd_request");
set_fdc(0);
@@ -2996,6 +3008,10 @@ static const struct cont_t reset_cont = {
.done = generic_done
};
+/*
+ * Resets the FDC connected to drive <drive>.
+ * Both current_drive and current_fdc are changed to match the new drive.
+ */
static int user_reset_fdc(int drive, int arg, bool interruptible)
{
int ret;
@@ -3006,6 +3022,9 @@ static int user_reset_fdc(int drive, int arg, bool interruptible)
if (arg == FD_RESET_ALWAYS)
fdc_state[current_fdc].reset = 1;
if (fdc_state[current_fdc].reset) {
+ /* note: reset_fdc will take care of unlocking the driver
+ * on completion.
+ */
cont = &reset_cont;
ret = wait_til_done(reset_fdc, interruptible);
if (ret == -EINTR)
@@ -3059,7 +3078,7 @@ static void raw_cmd_done(int flag)
raw_cmd->flags |= FD_RAW_HARDFAILURE;
} else {
raw_cmd->reply_count = inr;
- if (raw_cmd->reply_count > MAX_REPLIES)
+ if (raw_cmd->reply_count > FD_RAW_REPLY_SIZE)
raw_cmd->reply_count = 0;
for (i = 0; i < raw_cmd->reply_count; i++)
raw_cmd->reply[i] = reply_buffer[i];
@@ -3170,18 +3189,10 @@ loop:
if (ret)
return -EFAULT;
param += sizeof(struct floppy_raw_cmd);
- if (ptr->cmd_count > 33)
- /* the command may now also take up the space
- * initially intended for the reply & the
- * reply count. Needed for long 82078 commands
- * such as RESTORE, which takes ... 17 command
- * bytes. Murphy's law #137: When you reserve
- * 16 bytes for a structure, you'll one day
- * discover that you really need 17...
- */
+ if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE)
return -EINVAL;
- for (i = 0; i < 16; i++)
+ for (i = 0; i < FD_RAW_REPLY_SIZE; i++)
ptr->reply[i] = 0;
ptr->resultcode = 0;
@@ -3423,13 +3434,13 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo)
return 0;
}
-static bool valid_floppy_drive_params(const short autodetect[8],
+static bool valid_floppy_drive_params(const short autodetect[FD_AUTODETECT_SIZE],
int native_format)
{
size_t floppy_type_size = ARRAY_SIZE(floppy_type);
size_t i = 0;
- for (i = 0; i < 8; ++i) {
+ for (i = 0; i < FD_AUTODETECT_SIZE; ++i) {
if (autodetect[i] < 0 ||
autodetect[i] >= floppy_type_size)
return false;
@@ -3610,7 +3621,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int
case FDTWADDLE:
if (lock_fdc(drive))
return -EINTR;
- twaddle();
+ twaddle(current_fdc, current_drive);
process_fd_request();
return 0;
default:
@@ -3654,7 +3665,7 @@ struct compat_floppy_drive_params {
struct floppy_max_errors max_errors;
char flags;
char read_track;
- short autodetect[8];
+ short autodetect[FD_AUTODETECT_SIZE];
compat_int_t checkfreq;
compat_int_t native_format;
};
@@ -4298,79 +4309,79 @@ static const struct block_device_operations floppy_fops = {
/* Determine the floppy disk controller type */
/* This routine was written by David C. Niemi */
-static char __init get_fdc_version(void)
+static char __init get_fdc_version(int fdc)
{
int r;
- output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */
- if (fdc_state[current_fdc].reset)
+ output_byte(fdc, FD_DUMPREGS); /* 82072 and better know DUMPREGS */
+ if (fdc_state[fdc].reset)
return FDC_NONE;
- r = result();
+ r = result(fdc);
if (r <= 0x00)
return FDC_NONE; /* No FDC present ??? */
if ((r == 1) && (reply_buffer[0] == 0x80)) {
- pr_info("FDC %d is an 8272A\n", current_fdc);
+ pr_info("FDC %d is an 8272A\n", fdc);
return FDC_8272A; /* 8272a/765 don't know DUMPREGS */
}
if (r != 10) {
pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n",
- current_fdc, r);
+ fdc, r);
return FDC_UNKNOWN;
}
- if (!fdc_configure()) {
- pr_info("FDC %d is an 82072\n", current_fdc);
+ if (!fdc_configure(fdc)) {
+ pr_info("FDC %d is an 82072\n", fdc);
return FDC_82072; /* 82072 doesn't know CONFIGURE */
}
- output_byte(FD_PERPENDICULAR);
- if (need_more_output() == MORE_OUTPUT) {
- output_byte(0);
+ output_byte(fdc, FD_PERPENDICULAR);
+ if (need_more_output(fdc) == MORE_OUTPUT) {
+ output_byte(fdc, 0);
} else {
- pr_info("FDC %d is an 82072A\n", current_fdc);
+ pr_info("FDC %d is an 82072A\n", fdc);
return FDC_82072A; /* 82072A as found on Sparcs. */
}
- output_byte(FD_UNLOCK);
- r = result();
+ output_byte(fdc, FD_UNLOCK);
+ r = result(fdc);
if ((r == 1) && (reply_buffer[0] == 0x80)) {
- pr_info("FDC %d is a pre-1991 82077\n", current_fdc);
+ pr_info("FDC %d is a pre-1991 82077\n", fdc);
return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know
* LOCK/UNLOCK */
}
if ((r != 1) || (reply_buffer[0] != 0x00)) {
pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n",
- current_fdc, r);
+ fdc, r);
return FDC_UNKNOWN;
}
- output_byte(FD_PARTID);
- r = result();
+ output_byte(fdc, FD_PARTID);
+ r = result(fdc);
if (r != 1) {
pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n",
- current_fdc, r);
+ fdc, r);
return FDC_UNKNOWN;
}
if (reply_buffer[0] == 0x80) {
- pr_info("FDC %d is a post-1991 82077\n", current_fdc);
+ pr_info("FDC %d is a post-1991 82077\n", fdc);
return FDC_82077; /* Revised 82077AA passes all the tests */
}
switch (reply_buffer[0] >> 5) {
case 0x0:
/* Either a 82078-1 or a 82078SL running at 5Volt */
- pr_info("FDC %d is an 82078.\n", current_fdc);
+ pr_info("FDC %d is an 82078.\n", fdc);
return FDC_82078;
case 0x1:
- pr_info("FDC %d is a 44pin 82078\n", current_fdc);
+ pr_info("FDC %d is a 44pin 82078\n", fdc);
return FDC_82078;
case 0x2:
- pr_info("FDC %d is a S82078B\n", current_fdc);
+ pr_info("FDC %d is a S82078B\n", fdc);
return FDC_S82078B;
case 0x3:
- pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc);
+ pr_info("FDC %d is a National Semiconductor PC87306\n", fdc);
return FDC_87306;
default:
pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n",
- current_fdc, reply_buffer[0] >> 5);
+ fdc, reply_buffer[0] >> 5);
return FDC_82078_UNKN;
}
} /* get_fdc_version */
@@ -4534,11 +4545,13 @@ static void floppy_device_release(struct device *dev)
static int floppy_resume(struct device *dev)
{
int fdc;
+ int saved_drive;
+ saved_drive = current_drive;
for (fdc = 0; fdc < N_FDC; fdc++)
if (fdc_state[fdc].address != -1)
- user_reset_fdc(-1, FD_RESET_ALWAYS, false);
-
+ user_reset_fdc(REVDRIVE(fdc, 0), FD_RESET_ALWAYS, false);
+ set_fdc(saved_drive);
return 0;
}
@@ -4646,16 +4659,15 @@ static int __init do_floppy_init(void)
config_types();
for (i = 0; i < N_FDC; i++) {
- current_fdc = i;
- memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state));
- fdc_state[current_fdc].dtr = -1;
- fdc_state[current_fdc].dor = 0x4;
+ memset(&fdc_state[i], 0, sizeof(*fdc_state));
+ fdc_state[i].dtr = -1;
+ fdc_state[i].dor = 0x4;
#if defined(__sparc__) || defined(__mc68000__)
/*sparcs/sun3x don't have a DOR reset which we can fall back on to */
#ifdef __mc68000__
if (MACH_IS_SUN3X)
#endif
- fdc_state[current_fdc].version = FDC_82072A;
+ fdc_state[i].version = FDC_82072A;
#endif
}
@@ -4697,30 +4709,29 @@ static int __init do_floppy_init(void)
msleep(10);
for (i = 0; i < N_FDC; i++) {
- current_fdc = i;
- fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION;
+ fdc_state[i].driver_version = FD_DRIVER_VERSION;
for (unit = 0; unit < 4; unit++)
- fdc_state[current_fdc].track[unit] = 0;
- if (fdc_state[current_fdc].address == -1)
+ fdc_state[i].track[unit] = 0;
+ if (fdc_state[i].address == -1)
continue;
- fdc_state[current_fdc].rawcmd = 2;
- if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) {
+ fdc_state[i].rawcmd = 2;
+ if (user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false)) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- floppy_release_regions(current_fdc);
- fdc_state[current_fdc].address = -1;
- fdc_state[current_fdc].version = FDC_NONE;
+ floppy_release_regions(i);
+ fdc_state[i].address = -1;
+ fdc_state[i].version = FDC_NONE;
continue;
}
/* Try to determine the floppy controller type */
- fdc_state[current_fdc].version = get_fdc_version();
- if (fdc_state[current_fdc].version == FDC_NONE) {
+ fdc_state[i].version = get_fdc_version(i);
+ if (fdc_state[i].version == FDC_NONE) {
/* free ioports reserved by floppy_grab_irq_and_dma() */
- floppy_release_regions(current_fdc);
- fdc_state[current_fdc].address = -1;
+ floppy_release_regions(i);
+ fdc_state[i].address = -1;
continue;
}
if (can_use_virtual_dma == 2 &&
- fdc_state[current_fdc].version < FDC_82072A)
+ fdc_state[i].version < FDC_82072A)
can_use_virtual_dma = 0;
have_no_fdc = 0;
@@ -4728,7 +4739,7 @@ static int __init do_floppy_init(void)
* properly, so force a reset for the standard FDC clones,
* to avoid interrupt garbage.
*/
- user_reset_fdc(-1, FD_RESET_ALWAYS, false);
+ user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false);
}
current_fdc = 0;
cancel_delayed_work(&fd_timeout);
@@ -4855,6 +4866,8 @@ static void floppy_release_regions(int fdc)
static int floppy_grab_irq_and_dma(void)
{
+ int fdc;
+
if (atomic_inc_return(&usage_count) > 1)
return 0;
@@ -4882,24 +4895,24 @@ static int floppy_grab_irq_and_dma(void)
}
}
- for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
- if (fdc_state[current_fdc].address != -1) {
- if (floppy_request_regions(current_fdc))
+ for (fdc = 0; fdc < N_FDC; fdc++) {
+ if (fdc_state[fdc].address != -1) {
+ if (floppy_request_regions(fdc))
goto cleanup;
}
}
- for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) {
- if (fdc_state[current_fdc].address != -1) {
- reset_fdc_info(1);
- fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
+ for (fdc = 0; fdc < N_FDC; fdc++) {
+ if (fdc_state[fdc].address != -1) {
+ reset_fdc_info(fdc, 1);
+ fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR);
}
}
- current_fdc = 0;
+
set_dor(0, ~0, 8); /* avoid immediate interrupt */
- for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
- if (fdc_state[current_fdc].address != -1)
- fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR);
+ for (fdc = 0; fdc < N_FDC; fdc++)
+ if (fdc_state[fdc].address != -1)
+ fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR);
/*
* The driver will try and free resources and relies on us
* to know if they were allocated or not.
@@ -4910,15 +4923,16 @@ static int floppy_grab_irq_and_dma(void)
cleanup:
fd_free_irq();
fd_free_dma();
- while (--current_fdc >= 0)
- floppy_release_regions(current_fdc);
+ while (--fdc >= 0)
+ floppy_release_regions(fdc);
+ current_fdc = 0;
atomic_dec(&usage_count);
return -1;
}
static void floppy_release_irq_and_dma(void)
{
- int old_fdc;
+ int fdc;
#ifndef __sparc__
int drive;
#endif
@@ -4959,11 +4973,9 @@ static void floppy_release_irq_and_dma(void)
pr_info("auxiliary floppy timer still active\n");
if (work_pending(&floppy_work))
pr_info("work still pending\n");
- old_fdc = current_fdc;
- for (current_fdc = 0; current_fdc < N_FDC; current_fdc++)
- if (fdc_state[current_fdc].address != -1)
- floppy_release_regions(current_fdc);
- current_fdc = old_fdc;
+ for (fdc = 0; fdc < N_FDC; fdc++)
+ if (fdc_state[fdc].address != -1)
+ floppy_release_regions(fdc);
}
#ifdef MODULE
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 13dbe2f16882..4212288ab157 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -228,26 +228,36 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
blk_mq_unfreeze_queue(lo->lo_queue);
}
+/**
+ * loop_validate_block_size() - validates the passed in block size
+ * @bsize: size to validate
+ */
static int
-figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit)
+loop_validate_block_size(unsigned short bsize)
{
- loff_t size = get_size(offset, sizelimit, lo->lo_backing_file);
- sector_t x = (sector_t)size;
- struct block_device *bdev = lo->lo_device;
+ if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize))
+ return -EINVAL;
- if (unlikely((loff_t)x != size))
- return -EFBIG;
- if (lo->lo_offset != offset)
- lo->lo_offset = offset;
- if (lo->lo_sizelimit != sizelimit)
- lo->lo_sizelimit = sizelimit;
- set_capacity(lo->lo_disk, x);
- bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9);
- /* let user-space know about the new size */
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
return 0;
}
+/**
+ * loop_set_size() - sets device size and notifies userspace
+ * @lo: struct loop_device to set the size for
+ * @size: new size of the loop device
+ *
+ * Callers must validate that the size passed into this function fits into
+ * a sector_t, eg using loop_validate_size()
+ */
+static void loop_set_size(struct loop_device *lo, loff_t size)
+{
+ struct block_device *bdev = lo->lo_device;
+
+ bd_set_size(bdev, size << SECTOR_SHIFT);
+
+ set_capacity_revalidate_and_notify(lo->lo_disk, size, false);
+}
+
static inline int
lo_do_transfer(struct loop_device *lo, int cmd,
struct page *rpage, unsigned roffs,
@@ -952,23 +962,125 @@ static void loop_update_rotational(struct loop_device *lo)
blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
}
-static int loop_set_fd(struct loop_device *lo, fmode_t mode,
- struct block_device *bdev, unsigned int arg)
+static int
+loop_release_xfer(struct loop_device *lo)
+{
+ int err = 0;
+ struct loop_func_table *xfer = lo->lo_encryption;
+
+ if (xfer) {
+ if (xfer->release)
+ err = xfer->release(lo);
+ lo->transfer = NULL;
+ lo->lo_encryption = NULL;
+ module_put(xfer->owner);
+ }
+ return err;
+}
+
+static int
+loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
+ const struct loop_info64 *i)
+{
+ int err = 0;
+
+ if (xfer) {
+ struct module *owner = xfer->owner;
+
+ if (!try_module_get(owner))
+ return -EINVAL;
+ if (xfer->init)
+ err = xfer->init(lo, i);
+ if (err)
+ module_put(owner);
+ else
+ lo->lo_encryption = xfer;
+ }
+ return err;
+}
+
+/**
+ * loop_set_status_from_info - configure device from loop_info
+ * @lo: struct loop_device to configure
+ * @info: struct loop_info64 to configure the device with
+ *
+ * Configures the loop device parameters according to the passed
+ * in loop_info64 configuration.
+ */
+static int
+loop_set_status_from_info(struct loop_device *lo,
+ const struct loop_info64 *info)
+{
+ int err;
+ struct loop_func_table *xfer;
+ kuid_t uid = current_uid();
+
+ if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE)
+ return -EINVAL;
+
+ err = loop_release_xfer(lo);
+ if (err)
+ return err;
+
+ if (info->lo_encrypt_type) {
+ unsigned int type = info->lo_encrypt_type;
+
+ if (type >= MAX_LO_CRYPT)
+ return -EINVAL;
+ xfer = xfer_funcs[type];
+ if (xfer == NULL)
+ return -EINVAL;
+ } else
+ xfer = NULL;
+
+ err = loop_init_xfer(lo, xfer, info);
+ if (err)
+ return err;
+
+ lo->lo_offset = info->lo_offset;
+ lo->lo_sizelimit = info->lo_sizelimit;
+ memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
+ memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
+ lo->lo_file_name[LO_NAME_SIZE-1] = 0;
+ lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
+
+ if (!xfer)
+ xfer = &none_funcs;
+ lo->transfer = xfer->transfer;
+ lo->ioctl = xfer->ioctl;
+
+ lo->lo_flags = info->lo_flags;
+
+ lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
+ lo->lo_init[0] = info->lo_init[0];
+ lo->lo_init[1] = info->lo_init[1];
+ if (info->lo_encrypt_key_size) {
+ memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
+ info->lo_encrypt_key_size);
+ lo->lo_key_owner = uid;
+ }
+
+ return 0;
+}
+
+static int loop_configure(struct loop_device *lo, fmode_t mode,
+ struct block_device *bdev,
+ const struct loop_config *config)
{
struct file *file;
struct inode *inode;
struct address_space *mapping;
struct block_device *claimed_bdev = NULL;
- int lo_flags = 0;
int error;
loff_t size;
bool partscan;
+ unsigned short bsize;
/* This is safe, since we have a reference from open(). */
__module_get(THIS_MODULE);
error = -EBADF;
- file = fget(arg);
+ file = fget(config->fd);
if (!file)
goto out;
@@ -977,7 +1089,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
* here to avoid changing device under exclusive owner.
*/
if (!(mode & FMODE_EXCL)) {
- claimed_bdev = bd_start_claiming(bdev, loop_set_fd);
+ claimed_bdev = bd_start_claiming(bdev, loop_configure);
if (IS_ERR(claimed_bdev)) {
error = PTR_ERR(claimed_bdev);
goto out_putf;
@@ -999,52 +1111,58 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
mapping = file->f_mapping;
inode = mapping->host;
+ size = get_loop_size(lo, file);
+
+ if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
+ error = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (config->block_size) {
+ error = loop_validate_block_size(config->block_size);
+ if (error)
+ goto out_unlock;
+ }
+
+ error = loop_set_status_from_info(lo, &config->info);
+ if (error)
+ goto out_unlock;
+
if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) ||
!file->f_op->write_iter)
- lo_flags |= LO_FLAGS_READ_ONLY;
+ lo->lo_flags |= LO_FLAGS_READ_ONLY;
- error = -EFBIG;
- size = get_loop_size(lo, file);
- if ((loff_t)(sector_t)size != size)
- goto out_unlock;
error = loop_prepare_queue(lo);
if (error)
goto out_unlock;
- error = 0;
-
- set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0);
+ set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0);
- lo->use_dio = false;
+ lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO;
lo->lo_device = bdev;
- lo->lo_flags = lo_flags;
lo->lo_backing_file = file;
- lo->transfer = NULL;
- lo->ioctl = NULL;
- lo->lo_sizelimit = 0;
lo->old_gfp_mask = mapping_gfp_mask(mapping);
mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS));
- if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
+ if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync)
blk_queue_write_cache(lo->lo_queue, true, false);
- if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) {
+ if (config->block_size)
+ bsize = config->block_size;
+ else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev)
/* In case of direct I/O, match underlying block size */
- unsigned short bsize = bdev_logical_block_size(
- inode->i_sb->s_bdev);
+ bsize = bdev_logical_block_size(inode->i_sb->s_bdev);
+ else
+ bsize = 512;
- blk_queue_logical_block_size(lo->lo_queue, bsize);
- blk_queue_physical_block_size(lo->lo_queue, bsize);
- blk_queue_io_min(lo->lo_queue, bsize);
- }
+ blk_queue_logical_block_size(lo->lo_queue, bsize);
+ blk_queue_physical_block_size(lo->lo_queue, bsize);
+ blk_queue_io_min(lo->lo_queue, bsize);
loop_update_rotational(lo);
loop_update_dio(lo);
- set_capacity(lo->lo_disk, size);
- bd_set_size(bdev, size << 9);
loop_sysfs_init(lo);
- /* let user-space know about the new size */
- kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE);
+ loop_set_size(lo, size);
set_blocksize(bdev, S_ISBLK(inode->i_mode) ?
block_size(inode->i_bdev) : PAGE_SIZE);
@@ -1062,14 +1180,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode,
if (partscan)
loop_reread_partitions(lo, bdev);
if (claimed_bdev)
- bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
return 0;
out_unlock:
mutex_unlock(&loop_ctl_mutex);
out_bdev:
if (claimed_bdev)
- bd_abort_claiming(bdev, claimed_bdev, loop_set_fd);
+ bd_abort_claiming(bdev, claimed_bdev, loop_configure);
out_putf:
fput(file);
out:
@@ -1078,43 +1196,6 @@ out:
return error;
}
-static int
-loop_release_xfer(struct loop_device *lo)
-{
- int err = 0;
- struct loop_func_table *xfer = lo->lo_encryption;
-
- if (xfer) {
- if (xfer->release)
- err = xfer->release(lo);
- lo->transfer = NULL;
- lo->lo_encryption = NULL;
- module_put(xfer->owner);
- }
- return err;
-}
-
-static int
-loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer,
- const struct loop_info64 *i)
-{
- int err = 0;
-
- if (xfer) {
- struct module *owner = xfer->owner;
-
- if (!try_module_get(owner))
- return -EINVAL;
- if (xfer->init)
- err = xfer->init(lo, i);
- if (err)
- module_put(owner);
- else
- lo->lo_encryption = xfer;
- }
- return err;
-}
-
static int __loop_clr_fd(struct loop_device *lo, bool release)
{
struct file *filp = NULL;
@@ -1263,10 +1344,11 @@ static int
loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
{
int err;
- struct loop_func_table *xfer;
- kuid_t uid = current_uid();
struct block_device *bdev;
+ kuid_t uid = current_uid();
+ int prev_lo_flags;
bool partscan = false;
+ bool size_changed = false;
err = mutex_lock_killable(&loop_ctl_mutex);
if (err)
@@ -1281,13 +1363,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
err = -ENXIO;
goto out_unlock;
}
- if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) {
- err = -EINVAL;
- goto out_unlock;
- }
if (lo->lo_offset != info->lo_offset ||
lo->lo_sizelimit != info->lo_sizelimit) {
+ size_changed = true;
sync_blockdev(lo->lo_device);
kill_bdev(lo->lo_device);
}
@@ -1295,79 +1374,44 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
/* I/O need to be drained during transfer transition */
blk_mq_freeze_queue(lo->lo_queue);
- err = loop_release_xfer(lo);
- if (err)
+ if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) {
+ /* If any pages were dirtied after kill_bdev(), try again */
+ err = -EAGAIN;
+ pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
+ __func__, lo->lo_number, lo->lo_file_name,
+ lo->lo_device->bd_inode->i_mapping->nrpages);
goto out_unfreeze;
+ }
- if (info->lo_encrypt_type) {
- unsigned int type = info->lo_encrypt_type;
-
- if (type >= MAX_LO_CRYPT) {
- err = -EINVAL;
- goto out_unfreeze;
- }
- xfer = xfer_funcs[type];
- if (xfer == NULL) {
- err = -EINVAL;
- goto out_unfreeze;
- }
- } else
- xfer = NULL;
+ prev_lo_flags = lo->lo_flags;
- err = loop_init_xfer(lo, xfer, info);
+ err = loop_set_status_from_info(lo, info);
if (err)
goto out_unfreeze;
- if (lo->lo_offset != info->lo_offset ||
- lo->lo_sizelimit != info->lo_sizelimit) {
- /* kill_bdev should have truncated all the pages */
- if (lo->lo_device->bd_inode->i_mapping->nrpages) {
- err = -EAGAIN;
- pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n",
- __func__, lo->lo_number, lo->lo_file_name,
- lo->lo_device->bd_inode->i_mapping->nrpages);
- goto out_unfreeze;
- }
- if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) {
- err = -EFBIG;
- goto out_unfreeze;
- }
+ /* Mask out flags that can't be set using LOOP_SET_STATUS. */
+ lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+ /* For those flags, use the previous values instead */
+ lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+ /* For flags that can't be cleared, use previous values too */
+ lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS;
+
+ if (size_changed) {
+ loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit,
+ lo->lo_backing_file);
+ loop_set_size(lo, new_size);
}
loop_config_discard(lo);
- memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE);
- memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE);
- lo->lo_file_name[LO_NAME_SIZE-1] = 0;
- lo->lo_crypt_name[LO_NAME_SIZE-1] = 0;
-
- if (!xfer)
- xfer = &none_funcs;
- lo->transfer = xfer->transfer;
- lo->ioctl = xfer->ioctl;
-
- if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) !=
- (info->lo_flags & LO_FLAGS_AUTOCLEAR))
- lo->lo_flags ^= LO_FLAGS_AUTOCLEAR;
-
- lo->lo_encrypt_key_size = info->lo_encrypt_key_size;
- lo->lo_init[0] = info->lo_init[0];
- lo->lo_init[1] = info->lo_init[1];
- if (info->lo_encrypt_key_size) {
- memcpy(lo->lo_encrypt_key, info->lo_encrypt_key,
- info->lo_encrypt_key_size);
- lo->lo_key_owner = uid;
- }
-
/* update dio if lo_offset or transfer is changed */
__loop_update_dio(lo, lo->use_dio);
out_unfreeze:
blk_mq_unfreeze_queue(lo->lo_queue);
- if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) &&
- !(lo->lo_flags & LO_FLAGS_PARTSCAN)) {
- lo->lo_flags |= LO_FLAGS_PARTSCAN;
+ if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) &&
+ !(prev_lo_flags & LO_FLAGS_PARTSCAN)) {
lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN;
bdev = lo->lo_device;
partscan = true;
@@ -1531,10 +1575,15 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) {
static int loop_set_capacity(struct loop_device *lo)
{
+ loff_t size;
+
if (unlikely(lo->lo_state != Lo_bound))
return -ENXIO;
- return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit);
+ size = get_loop_size(lo, lo->lo_backing_file);
+ loop_set_size(lo, size);
+
+ return 0;
}
static int loop_set_dio(struct loop_device *lo, unsigned long arg)
@@ -1558,8 +1607,9 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg)
if (lo->lo_state != Lo_bound)
return -ENXIO;
- if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg))
- return -EINVAL;
+ err = loop_validate_block_size(arg);
+ if (err)
+ return err;
if (lo->lo_queue->limits.logical_block_size == arg)
return 0;
@@ -1617,11 +1667,31 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
struct loop_device *lo = bdev->bd_disk->private_data;
+ void __user *argp = (void __user *) arg;
int err;
switch (cmd) {
- case LOOP_SET_FD:
- return loop_set_fd(lo, mode, bdev, arg);
+ case LOOP_SET_FD: {
+ /*
+ * Legacy case - pass in a zeroed out struct loop_config with
+ * only the file descriptor set , which corresponds with the
+ * default parameters we'd have used otherwise.
+ */
+ struct loop_config config;
+
+ memset(&config, 0, sizeof(config));
+ config.fd = arg;
+
+ return loop_configure(lo, mode, bdev, &config);
+ }
+ case LOOP_CONFIGURE: {
+ struct loop_config config;
+
+ if (copy_from_user(&config, argp, sizeof(config)))
+ return -EFAULT;
+
+ return loop_configure(lo, mode, bdev, &config);
+ }
case LOOP_CHANGE_FD:
return loop_change_fd(lo, bdev, arg);
case LOOP_CLR_FD:
@@ -1629,21 +1699,19 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_SET_STATUS:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
- err = loop_set_status_old(lo,
- (struct loop_info __user *)arg);
+ err = loop_set_status_old(lo, argp);
}
break;
case LOOP_GET_STATUS:
- return loop_get_status_old(lo, (struct loop_info __user *) arg);
+ return loop_get_status_old(lo, argp);
case LOOP_SET_STATUS64:
err = -EPERM;
if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) {
- err = loop_set_status64(lo,
- (struct loop_info64 __user *) arg);
+ err = loop_set_status64(lo, argp);
}
break;
case LOOP_GET_STATUS64:
- return loop_get_status64(lo, (struct loop_info64 __user *) arg);
+ return loop_get_status64(lo, argp);
case LOOP_SET_CAPACITY:
case LOOP_SET_DIRECT_IO:
case LOOP_SET_BLOCK_SIZE:
@@ -1795,6 +1863,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode,
case LOOP_CLR_FD:
case LOOP_GET_STATUS64:
case LOOP_SET_STATUS64:
+ case LOOP_CONFIGURE:
arg = (unsigned long) compat_ptr(arg);
/* fall through */
case LOOP_SET_FD:
diff --git a/drivers/block/swim.c b/drivers/block/swim.c
index 4c297f69171d..dd34504382e5 100644
--- a/drivers/block/swim.c
+++ b/drivers/block/swim.c
@@ -327,7 +327,7 @@ static inline void swim_motor(struct swim __iomem *base,
swim_select(base, RELAX);
if (swim_readbit(base, MOTOR_ON))
break;
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
}
} else if (action == OFF) {
@@ -346,7 +346,7 @@ static inline void swim_eject(struct swim __iomem *base)
swim_select(base, RELAX);
if (!swim_readbit(base, DISK_IN))
break;
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
}
swim_select(base, RELAX);
@@ -370,7 +370,7 @@ static inline int swim_step(struct swim __iomem *base)
for (wait = 0; wait < HZ; wait++) {
- current->state = TASK_INTERRUPTIBLE;
+ set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(1);
swim_select(base, RELAX);
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 6dfa653d30db..bf7dd96db9b3 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -26,3 +26,12 @@ config BCACHE_CLOSURES_DEBUG
Keeps all active closures in a linked list and provides a debugfs
interface to list them, which makes it possible to see asynchronous
operations that get stuck.
+
+config BCACHE_ASYNC_REGISTRAION
+ bool "Asynchronous device registration (EXPERIMENTAL)"
+ depends on BCACHE
+ help
+ Add a sysfs file /sys/fs/bcache/register_async. Writing registering
+ device path into this file will returns immediately and the real
+ registration work is handled in kernel work queue in asynchronous
+ way.
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 74a9849ea164..221e0191b687 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -176,7 +176,7 @@
* - updates to non leaf nodes just happen synchronously (see btree_split()).
*/
-#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
#include <linux/bcache.h>
#include <linux/bio.h>
diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c
index 4385303836d8..4995fcaefe29 100644
--- a/drivers/md/bcache/bset.c
+++ b/drivers/md/bcache/bset.c
@@ -6,7 +6,7 @@
* Copyright 2012 Google, Inc.
*/
-#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__
+#define pr_fmt(fmt) "bcache: %s() " fmt, __func__
#include "util.h"
#include "bset.h"
@@ -31,7 +31,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set)
if (b->ops->key_dump)
b->ops->key_dump(b, k);
else
- pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
+ pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k));
if (next < bset_bkey_last(i) &&
bkey_cmp(k, b->ops->is_extents ?
@@ -1225,7 +1225,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0;
- pr_debug("sorted %i keys", out->keys);
+ pr_debug("sorted %i keys\n", out->keys);
}
static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 72856e5f23a3..39de94edd73a 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -619,7 +619,7 @@ retry:
* and BTREE_NODE_journal_flush bit cleared by btree_flush_write().
*/
if (btree_node_journal_flush(b)) {
- pr_debug("bnode %p is flushing by journal, retry", b);
+ pr_debug("bnode %p is flushing by journal, retry\n", b);
mutex_unlock(&b->write_lock);
udelay(1);
goto retry;
@@ -802,7 +802,7 @@ int bch_btree_cache_alloc(struct cache_set *c)
c->shrink.batch = c->btree_pages * 2;
if (register_shrinker(&c->shrink))
- pr_warn("bcache: %s: could not register shrinker",
+ pr_warn("bcache: %s: could not register shrinker\n",
__func__);
return 0;
@@ -1054,7 +1054,7 @@ retry:
*/
if (btree_node_journal_flush(b)) {
mutex_unlock(&b->write_lock);
- pr_debug("bnode %p journal_flush set, retry", b);
+ pr_debug("bnode %p journal_flush set, retry\n", b);
udelay(1);
goto retry;
}
@@ -1798,7 +1798,7 @@ static void bch_btree_gc(struct cache_set *c)
schedule_timeout_interruptible(msecs_to_jiffies
(GC_SLEEP_MS));
else if (ret)
- pr_warn("gc failed!");
+ pr_warn("gc failed!\n");
} while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags));
bch_btree_gc_finish(c);
@@ -1907,10 +1907,8 @@ static int bch_btree_check_thread(void *arg)
struct btree_iter iter;
struct bkey *k, *p;
int cur_idx, prev_idx, skip_nr;
- int i, n;
k = p = NULL;
- i = n = 0;
cur_idx = prev_idx = 0;
ret = 0;
@@ -2045,7 +2043,7 @@ int bch_btree_check(struct cache_set *c)
&check_state->infos[i],
name);
if (IS_ERR(check_state->infos[i].thread)) {
- pr_err("fails to run thread bch_btrchk[%d]", i);
+ pr_err("fails to run thread bch_btrchk[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(check_state->infos[i].thread);
ret = -ENOMEM;
@@ -2456,7 +2454,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys,
if (ret) {
struct bkey *k;
- pr_err("error %i", ret);
+ pr_err("error %i\n", ret);
while ((k = bch_keylist_pop(keys)))
bkey_put(c, k);
@@ -2744,7 +2742,7 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c,
break;
if (bkey_cmp(&buf->last_scanned, end) >= 0) {
- pr_debug("scan finished");
+ pr_debug("scan finished\n");
break;
}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 886710043025..9162af5bb6ec 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -130,18 +130,18 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k)
char buf[80];
bch_extent_to_text(buf, sizeof(buf), k);
- pr_err(" %s", buf);
+ pr_cont(" %s", buf);
for (j = 0; j < KEY_PTRS(k); j++) {
size_t n = PTR_BUCKET_NR(b->c, k, j);
- pr_err(" bucket %zu", n);
+ pr_cont(" bucket %zu", n);
if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets)
- pr_err(" prio %i",
- PTR_BUCKET(b->c, k, j)->prio);
+ pr_cont(" prio %i",
+ PTR_BUCKET(b->c, k, j)->prio);
}
- pr_err(" %s\n", bch_ptr_status(b->c, k));
+ pr_cont(" %s\n", bch_ptr_status(b->c, k));
}
/* Btree ptrs */
@@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k)
if (stale && KEY_DIRTY(k)) {
bch_extent_to_text(buf, sizeof(buf), k);
- pr_info("stale dirty pointer, stale %u, key: %s",
+ pr_info("stale dirty pointer, stale %u, key: %s\n",
stale, buf);
}
diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
index 4d93f07f63e5..b25ee33b0d0b 100644
--- a/drivers/md/bcache/io.c
+++ b/drivers/md/bcache/io.c
@@ -65,14 +65,14 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
* we shouldn't count failed REQ_RAHEAD bio to dc->io_errors.
*/
if (bio->bi_opf & REQ_RAHEAD) {
- pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore",
+ pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n",
dc->backing_dev_name);
return;
}
errors = atomic_add_return(1, &dc->io_errors);
if (errors < dc->error_limit)
- pr_err("%s: IO error on backing device, unrecoverable",
+ pr_err("%s: IO error on backing device, unrecoverable\n",
dc->backing_dev_name);
else
bch_cached_dev_error(dc);
@@ -123,12 +123,12 @@ void bch_count_io_errors(struct cache *ca,
errors >>= IO_ERROR_SHIFT;
if (errors < ca->set->error_limit)
- pr_err("%s: IO error on %s%s",
+ pr_err("%s: IO error on %s%s\n",
ca->cache_dev_name, m,
is_read ? ", recovering." : ".");
else
bch_cache_set_error(ca->set,
- "%s: too many IO errors %s",
+ "%s: too many IO errors %s\n",
ca->cache_dev_name, m);
}
}
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 0e3ff9745ac7..90aac4e2333f 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -47,7 +47,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list,
closure_init_stack(&cl);
- pr_debug("reading %u", bucket_index);
+ pr_debug("reading %u\n", bucket_index);
while (offset < ca->sb.bucket_size) {
reread: left = ca->sb.bucket_size - offset;
@@ -78,13 +78,13 @@ reread: left = ca->sb.bucket_size - offset;
size_t blocks, bytes = set_bytes(j);
if (j->magic != jset_magic(&ca->sb)) {
- pr_debug("%u: bad magic", bucket_index);
+ pr_debug("%u: bad magic\n", bucket_index);
return ret;
}
if (bytes > left << 9 ||
bytes > PAGE_SIZE << JSET_BITS) {
- pr_info("%u: too big, %zu bytes, offset %u",
+ pr_info("%u: too big, %zu bytes, offset %u\n",
bucket_index, bytes, offset);
return ret;
}
@@ -93,7 +93,7 @@ reread: left = ca->sb.bucket_size - offset;
goto reread;
if (j->csum != csum_set(j)) {
- pr_info("%u: bad csum, %zu bytes, offset %u",
+ pr_info("%u: bad csum, %zu bytes, offset %u\n",
bucket_index, bytes, offset);
return ret;
}
@@ -190,7 +190,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
uint64_t seq;
bitmap_zero(bitmap, SB_JOURNAL_BUCKETS);
- pr_debug("%u journal buckets", ca->sb.njournal_buckets);
+ pr_debug("%u journal buckets\n", ca->sb.njournal_buckets);
/*
* Read journal buckets ordered by golden ratio hash to quickly
@@ -215,7 +215,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
* If that fails, check all the buckets we haven't checked
* already
*/
- pr_debug("falling back to linear search");
+ pr_debug("falling back to linear search\n");
for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets);
l < ca->sb.njournal_buckets;
@@ -233,7 +233,7 @@ bsearch:
/* Binary search */
m = l;
r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1);
- pr_debug("starting binary search, l %u r %u", l, r);
+ pr_debug("starting binary search, l %u r %u\n", l, r);
while (l + 1 < r) {
seq = list_entry(list->prev, struct journal_replay,
@@ -253,7 +253,7 @@ bsearch:
* Read buckets in reverse order until we stop finding more
* journal entries
*/
- pr_debug("finishing up: m %u njournal_buckets %u",
+ pr_debug("finishing up: m %u njournal_buckets %u\n",
m, ca->sb.njournal_buckets);
l = m;
@@ -370,10 +370,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
if (n != i->j.seq) {
if (n == start && is_discard_enabled(s))
- pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)",
+ pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n",
n, i->j.seq - 1, start, end);
else {
- pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)",
+ pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n",
n, i->j.seq - 1, start, end);
ret = -EIO;
goto err;
@@ -403,7 +403,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list)
entries++;
}
- pr_info("journal replay done, %i keys in %i entries, seq %llu",
+ pr_info("journal replay done, %i keys in %i entries, seq %llu\n",
keys, entries, end);
err:
while (!list_empty(list)) {
@@ -481,7 +481,7 @@ static void btree_flush_write(struct cache_set *c)
break;
if (btree_node_journal_flush(b))
- pr_err("BUG: flush_write bit should not be set here!");
+ pr_err("BUG: flush_write bit should not be set here!\n");
mutex_lock(&b->write_lock);
@@ -534,13 +534,13 @@ static void btree_flush_write(struct cache_set *c)
for (i = 0; i < nr; i++) {
b = btree_nodes[i];
if (!b) {
- pr_err("BUG: btree_nodes[%d] is NULL", i);
+ pr_err("BUG: btree_nodes[%d] is NULL\n", i);
continue;
}
/* safe to check without holding b->write_lock */
if (!btree_node_journal_flush(b)) {
- pr_err("BUG: bnode %p: journal_flush bit cleaned", b);
+ pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b);
continue;
}
@@ -548,14 +548,14 @@ static void btree_flush_write(struct cache_set *c)
if (!btree_current_write(b)->journal) {
clear_bit(BTREE_NODE_journal_flush, &b->flags);
mutex_unlock(&b->write_lock);
- pr_debug("bnode %p: written by others", b);
+ pr_debug("bnode %p: written by others\n", b);
continue;
}
if (!btree_node_dirty(b)) {
clear_bit(BTREE_NODE_journal_flush, &b->flags);
mutex_unlock(&b->write_lock);
- pr_debug("bnode %p: dirty bit cleaned by others", b);
+ pr_debug("bnode %p: dirty bit cleaned by others\n", b);
continue;
}
@@ -716,7 +716,7 @@ void bch_journal_next(struct journal *j)
j->cur->data->keys = 0;
if (fifo_full(&j->pin))
- pr_debug("journal_pin full (%zu)", fifo_used(&j->pin));
+ pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin));
}
static void journal_write_endio(struct bio *bio)
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 22b483527176..7acf024e99f3 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -110,7 +110,7 @@ static void bch_data_invalidate(struct closure *cl)
struct data_insert_op *op = container_of(cl, struct data_insert_op, cl);
struct bio *bio = op->bio;
- pr_debug("invalidating %i sectors from %llu",
+ pr_debug("invalidating %i sectors from %llu\n",
bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector);
while (bio_sectors(bio)) {
@@ -396,7 +396,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio)
if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) ||
bio_sectors(bio) & (c->sb.block_size - 1)) {
- pr_debug("skipping unaligned io");
+ pr_debug("skipping unaligned io\n");
goto skip;
}
@@ -650,7 +650,7 @@ static void backing_request_endio(struct bio *bio)
*/
if (unlikely(s->iop.writeback &&
bio->bi_opf & REQ_PREFLUSH)) {
- pr_err("Can't flush %s: returned bi_status %i",
+ pr_err("Can't flush %s: returned bi_status %i\n",
dc->backing_dev_name, bio->bi_status);
} else {
/* set to orig_bio->bi_status in bio_complete() */
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index d98354fa28e3..f9975c22bf7e 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -89,7 +89,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
for (i = 0; i < SB_JOURNAL_BUCKETS; i++)
sb->d[i] = le64_to_cpu(s->d[i]);
- pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u",
+ pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n",
sb->version, sb->flags, sb->seq, sb->keys);
err = "Not a bcache superblock (bad offset)";
@@ -234,7 +234,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out,
out->csum = csum_set(out);
- pr_debug("ver %llu, flags %llu, seq %llu",
+ pr_debug("ver %llu, flags %llu, seq %llu\n",
sb->version, sb->flags, sb->seq);
submit_bio(bio);
@@ -365,11 +365,11 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags,
}
bch_extent_to_text(buf, sizeof(buf), k);
- pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf);
+ pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf);
for (u = c->uuids; u < c->uuids + c->nr_uuids; u++)
if (!bch_is_zero(u->uuid, 16))
- pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u",
+ pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n",
u - c->uuids, u->uuid, u->label,
u->first_reg, u->last_reg, u->invalidated);
@@ -534,7 +534,7 @@ int bch_prio_write(struct cache *ca, bool wait)
struct bucket *b;
struct closure cl;
- pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu",
+ pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n",
fifo_used(&ca->free[RESERVE_PRIO]),
fifo_used(&ca->free[RESERVE_NONE]),
fifo_used(&ca->free_inc));
@@ -629,12 +629,12 @@ static int prio_read(struct cache *ca, uint64_t bucket)
if (p->csum !=
bch_crc64(&p->magic, bucket_bytes(ca) - 8)) {
- pr_warn("bad csum reading priorities");
+ pr_warn("bad csum reading priorities\n");
goto out;
}
if (p->magic != pset_magic(&ca->sb)) {
- pr_warn("bad magic reading priorities");
+ pr_warn("bad magic reading priorities\n");
goto out;
}
@@ -728,11 +728,11 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c,
ret = sysfs_create_link(&d->kobj, &c->kobj, "cache");
if (ret < 0)
- pr_err("Couldn't create device -> cache set symlink");
+ pr_err("Couldn't create device -> cache set symlink\n");
ret = sysfs_create_link(&c->kobj, &d->kobj, d->name);
if (ret < 0)
- pr_err("Couldn't create cache set -> device symlink");
+ pr_err("Couldn't create cache set -> device symlink\n");
clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags);
}
@@ -789,15 +789,17 @@ static void bcache_device_free(struct bcache_device *d)
lockdep_assert_held(&bch_register_lock);
if (disk)
- pr_info("%s stopped", disk->disk_name);
+ pr_info("%s stopped\n", disk->disk_name);
else
- pr_err("bcache device (NULL gendisk) stopped");
+ pr_err("bcache device (NULL gendisk) stopped\n");
if (d->c)
bcache_device_detach(d);
if (disk) {
- if (disk->flags & GENHD_FL_UP)
+ bool disk_added = (disk->flags & GENHD_FL_UP) != 0;
+
+ if (disk_added)
del_gendisk(disk);
if (disk->queue)
@@ -805,7 +807,8 @@ static void bcache_device_free(struct bcache_device *d)
ida_simple_remove(&bcache_device_idx,
first_minor_to_idx(disk->first_minor));
- put_disk(disk);
+ if (disk_added)
+ put_disk(disk);
}
bioset_exit(&d->bio_split);
@@ -830,7 +833,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size);
if (!d->nr_stripes || d->nr_stripes > max_stripes) {
- pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)",
+ pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n",
(unsigned int)d->nr_stripes);
return -ENOMEM;
}
@@ -928,11 +931,11 @@ static int cached_dev_status_update(void *arg)
dc->offline_seconds = 0;
if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) {
- pr_err("%s: device offline for %d seconds",
+ pr_err("%s: device offline for %d seconds\n",
dc->backing_dev_name,
BACKING_DEV_OFFLINE_TIMEOUT);
- pr_err("%s: disable I/O request due to backing "
- "device offline", dc->disk.name);
+ pr_err("%s: disable I/O request due to backing device offline\n",
+ dc->disk.name);
dc->io_disable = true;
/* let others know earlier that io_disable is true */
smp_mb();
@@ -959,7 +962,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
};
if (dc->io_disable) {
- pr_err("I/O disabled on cached dev %s",
+ pr_err("I/O disabled on cached dev %s\n",
dc->backing_dev_name);
kfree(env[1]);
kfree(env[2]);
@@ -971,7 +974,7 @@ int bch_cached_dev_run(struct cached_dev *dc)
kfree(env[1]);
kfree(env[2]);
kfree(buf);
- pr_info("cached dev %s is running already",
+ pr_info("cached dev %s is running already\n",
dc->backing_dev_name);
return -EBUSY;
}
@@ -1001,16 +1004,14 @@ int bch_cached_dev_run(struct cached_dev *dc)
if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") ||
sysfs_create_link(&disk_to_dev(d->disk)->kobj,
&d->kobj, "bcache")) {
- pr_err("Couldn't create bcache dev <-> disk sysfs symlinks");
+ pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n");
return -ENOMEM;
}
dc->status_update_thread = kthread_run(cached_dev_status_update,
dc, "bcache_status_update");
if (IS_ERR(dc->status_update_thread)) {
- pr_warn("failed to create bcache_status_update kthread, "
- "continue to run without monitoring backing "
- "device status");
+ pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n");
}
return 0;
@@ -1036,7 +1037,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc)
} while (time_out > 0);
if (time_out == 0)
- pr_warn("give up waiting for dc->writeback_write_update to quit");
+ pr_warn("give up waiting for dc->writeback_write_update to quit\n");
cancel_delayed_work_sync(&dc->writeback_rate_update);
}
@@ -1077,7 +1078,7 @@ static void cached_dev_detach_finish(struct work_struct *w)
mutex_unlock(&bch_register_lock);
- pr_info("Caching disabled for %s", dc->backing_dev_name);
+ pr_info("Caching disabled for %s\n", dc->backing_dev_name);
/* Drop ref we took in cached_dev_detach() */
closure_put(&dc->disk.cl);
@@ -1117,20 +1118,20 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
return -ENOENT;
if (dc->disk.c) {
- pr_err("Can't attach %s: already attached",
+ pr_err("Can't attach %s: already attached\n",
dc->backing_dev_name);
return -EINVAL;
}
if (test_bit(CACHE_SET_STOPPING, &c->flags)) {
- pr_err("Can't attach %s: shutting down",
+ pr_err("Can't attach %s: shutting down\n",
dc->backing_dev_name);
return -EINVAL;
}
if (dc->sb.block_size < c->sb.block_size) {
/* Will die */
- pr_err("Couldn't attach %s: block size less than set's block size",
+ pr_err("Couldn't attach %s: block size less than set's block size\n",
dc->backing_dev_name);
return -EINVAL;
}
@@ -1138,7 +1139,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
/* Check whether already attached */
list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) {
if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) {
- pr_err("Tried to attach %s but duplicate UUID already attached",
+ pr_err("Tried to attach %s but duplicate UUID already attached\n",
dc->backing_dev_name);
return -EINVAL;
@@ -1157,14 +1158,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
if (!u) {
if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) {
- pr_err("Couldn't find uuid for %s in set",
+ pr_err("Couldn't find uuid for %s in set\n",
dc->backing_dev_name);
return -ENOENT;
}
u = uuid_find_empty(c);
if (!u) {
- pr_err("Not caching %s, no room for UUID",
+ pr_err("Not caching %s, no room for UUID\n",
dc->backing_dev_name);
return -EINVAL;
}
@@ -1210,7 +1211,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
down_write(&dc->writeback_lock);
if (bch_cached_dev_writeback_start(dc)) {
up_write(&dc->writeback_lock);
- pr_err("Couldn't start writeback facilities for %s",
+ pr_err("Couldn't start writeback facilities for %s\n",
dc->disk.disk->disk_name);
return -ENOMEM;
}
@@ -1233,7 +1234,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
*/
kthread_stop(dc->writeback_thread);
cancel_writeback_rate_update_dwork(dc);
- pr_err("Couldn't run cached device %s",
+ pr_err("Couldn't run cached device %s\n",
dc->backing_dev_name);
return ret;
}
@@ -1244,7 +1245,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
- pr_info("Caching %s as %s on set %pU",
+ pr_info("Caching %s as %s on set %pU\n",
dc->backing_dev_name,
dc->disk.disk->disk_name,
dc->disk.c->sb.set_uuid);
@@ -1384,7 +1385,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
- pr_info("registered backing device %s", dc->backing_dev_name);
+ pr_info("registered backing device %s\n", dc->backing_dev_name);
list_add(&dc->list, &uncached_devices);
/* attach to a matched cache set if it exists */
@@ -1401,7 +1402,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
return 0;
err:
- pr_notice("error %s: %s", dc->backing_dev_name, err);
+ pr_notice("error %s: %s\n", dc->backing_dev_name, err);
bcache_device_stop(&dc->disk);
return ret;
}
@@ -1497,7 +1498,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
u = uuid_find_empty(c);
if (!u) {
- pr_err("Can't create volume, no room for UUID");
+ pr_err("Can't create volume, no room for UUID\n");
return -EINVAL;
}
@@ -1523,7 +1524,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
smp_mb();
pr_err("stop %s: too many IO errors on backing device %s\n",
- dc->disk.disk->disk_name, dc->backing_dev_name);
+ dc->disk.disk->disk_name, dc->backing_dev_name);
bcache_device_stop(&dc->disk);
return true;
@@ -1534,6 +1535,7 @@ bool bch_cached_dev_error(struct cached_dev *dc)
__printf(2, 3)
bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
{
+ struct va_format vaf;
va_list args;
if (c->on_error != ON_ERROR_PANIC &&
@@ -1541,20 +1543,22 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...)
return false;
if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags))
- pr_info("CACHE_SET_IO_DISABLE already set");
+ pr_info("CACHE_SET_IO_DISABLE already set\n");
/*
* XXX: we can be called from atomic context
* acquire_console_sem();
*/
- pr_err("bcache: error on %pU: ", c->sb.set_uuid);
-
va_start(args, fmt);
- vprintk(fmt, args);
- va_end(args);
- pr_err(", disabling caching\n");
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_err("error on %pU: %pV, disabling caching\n",
+ c->sb.set_uuid, &vaf);
+
+ va_end(args);
if (c->on_error == ON_ERROR_PANIC)
panic("panic forced after error\n");
@@ -1606,7 +1610,7 @@ static void cache_set_free(struct closure *cl)
list_del(&c->list);
mutex_unlock(&bch_register_lock);
- pr_info("Cache set %pU unregistered", c->sb.set_uuid);
+ pr_info("Cache set %pU unregistered\n", c->sb.set_uuid);
wake_up(&unregister_wait);
closure_debug_destroy(&c->cl);
@@ -1677,7 +1681,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
struct cached_dev *dc)
{
if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) {
- pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.",
+ pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n",
d->disk->disk_name, c->sb.set_uuid);
bcache_device_stop(d);
} else if (atomic_read(&dc->has_dirty)) {
@@ -1685,7 +1689,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 1
*/
- pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.",
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n",
d->disk->disk_name);
/*
* There might be a small time gap that cache set is
@@ -1707,7 +1711,7 @@ static void conditional_stop_bcache_device(struct cache_set *c,
* dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO
* and dc->has_dirty == 0
*/
- pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.",
+ pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n",
d->disk->disk_name);
}
}
@@ -1874,7 +1878,7 @@ static int run_cache_set(struct cache_set *c)
if (bch_journal_read(c, &journal))
goto err;
- pr_debug("btree_journal_read() done");
+ pr_debug("btree_journal_read() done\n");
err = "no journal entries found";
if (list_empty(&journal))
@@ -1920,7 +1924,7 @@ static int run_cache_set(struct cache_set *c)
bch_journal_mark(c, &journal);
bch_initial_gc_finish(c);
- pr_debug("btree_check() done");
+ pr_debug("btree_check() done\n");
/*
* bcache_journal_next() can't happen sooner, or
@@ -1951,7 +1955,7 @@ static int run_cache_set(struct cache_set *c)
if (bch_journal_replay(c, &journal))
goto err;
} else {
- pr_notice("invalidating existing data");
+ pr_notice("invalidating existing data\n");
for_each_cache(ca, c, i) {
unsigned int j;
@@ -2085,7 +2089,7 @@ found:
memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16);
c->sb.flags = ca->sb.flags;
c->sb.seq = ca->sb.seq;
- pr_debug("set version = %llu", c->sb.version);
+ pr_debug("set version = %llu\n", c->sb.version);
}
kobject_get(&ca->kobj);
@@ -2247,7 +2251,7 @@ err_btree_alloc:
err_free:
module_put(THIS_MODULE);
if (err)
- pr_notice("error %s: %s", ca->cache_dev_name, err);
+ pr_notice("error %s: %s\n", ca->cache_dev_name, err);
return ret;
}
@@ -2301,14 +2305,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk,
goto out;
}
- pr_info("registered cache device %s", ca->cache_dev_name);
+ pr_info("registered cache device %s\n", ca->cache_dev_name);
out:
kobject_put(&ca->kobj);
err:
if (err)
- pr_notice("error %s: %s", ca->cache_dev_name, err);
+ pr_notice("error %s: %s\n", ca->cache_dev_name, err);
return ret;
}
@@ -2323,6 +2327,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
kobj_attribute_write(register, register_bcache);
kobj_attribute_write(register_quiet, register_bcache);
+kobj_attribute_write(register_async, register_bcache);
kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup);
static bool bch_is_open_backing(struct block_device *bdev)
@@ -2358,6 +2363,83 @@ static bool bch_is_open(struct block_device *bdev)
return bch_is_open_cache(bdev) || bch_is_open_backing(bdev);
}
+struct async_reg_args {
+ struct work_struct reg_work;
+ char *path;
+ struct cache_sb *sb;
+ struct cache_sb_disk *sb_disk;
+ struct block_device *bdev;
+};
+
+static void register_bdev_worker(struct work_struct *work)
+{
+ int fail = false;
+ struct async_reg_args *args =
+ container_of(work, struct async_reg_args, reg_work);
+ struct cached_dev *dc;
+
+ dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+ if (!dc) {
+ fail = true;
+ put_page(virt_to_page(args->sb_disk));
+ blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ goto out;
+ }
+
+ mutex_lock(&bch_register_lock);
+ if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0)
+ fail = true;
+ mutex_unlock(&bch_register_lock);
+
+out:
+ if (fail)
+ pr_info("error %s: fail to register backing device\n",
+ args->path);
+ kfree(args->sb);
+ kfree(args->path);
+ kfree(args);
+ module_put(THIS_MODULE);
+}
+
+static void register_cache_worker(struct work_struct *work)
+{
+ int fail = false;
+ struct async_reg_args *args =
+ container_of(work, struct async_reg_args, reg_work);
+ struct cache *ca;
+
+ ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ if (!ca) {
+ fail = true;
+ put_page(virt_to_page(args->sb_disk));
+ blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
+ goto out;
+ }
+
+ /* blkdev_put() will be called in bch_cache_release() */
+ if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0)
+ fail = true;
+
+out:
+ if (fail)
+ pr_info("error %s: fail to register cache device\n",
+ args->path);
+ kfree(args->sb);
+ kfree(args->path);
+ kfree(args);
+ module_put(THIS_MODULE);
+}
+
+static void register_device_aync(struct async_reg_args *args)
+{
+ if (SB_IS_BDEV(args->sb))
+ INIT_WORK(&args->reg_work, register_bdev_worker);
+ else
+ INIT_WORK(&args->reg_work, register_cache_worker);
+
+ queue_work(system_wq, &args->reg_work);
+}
+
static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
const char *buffer, size_t size)
{
@@ -2420,6 +2502,26 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
goto out_blkdev_put;
err = "failed to register device";
+ if (attr == &ksysfs_register_async) {
+ /* register in asynchronous way */
+ struct async_reg_args *args =
+ kzalloc(sizeof(struct async_reg_args), GFP_KERNEL);
+
+ if (!args) {
+ ret = -ENOMEM;
+ err = "cannot allocate memory";
+ goto out_put_sb_page;
+ }
+
+ args->path = path;
+ args->sb = sb;
+ args->sb_disk = sb_disk;
+ args->bdev = bdev;
+ register_device_aync(args);
+ /* No wait and returns to user space */
+ goto async_done;
+ }
+
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
@@ -2447,6 +2549,7 @@ done:
kfree(sb);
kfree(path);
module_put(THIS_MODULE);
+async_done:
return size;
out_put_sb_page:
@@ -2461,7 +2564,7 @@ out_free_path:
out_module_put:
module_put(THIS_MODULE);
out:
- pr_info("error %s: %s", path?path:"", err);
+ pr_info("error %s: %s\n", path?path:"", err);
return ret;
}
@@ -2506,7 +2609,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
mutex_unlock(&bch_register_lock);
list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
- pr_info("delete pdev %p", pdev);
+ pr_info("delete pdev %p\n", pdev);
list_del(&pdev->list);
bcache_device_stop(&pdev->dc->disk);
kfree(pdev);
@@ -2549,7 +2652,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
mutex_unlock(&bch_register_lock);
- pr_info("Stopping all devices:");
+ pr_info("Stopping all devices:\n");
/*
* The reason bch_register_lock is not held to call
@@ -2599,9 +2702,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
finish_wait(&unregister_wait, &wait);
if (stopped)
- pr_info("All devices stopped");
+ pr_info("All devices stopped\n");
else
- pr_notice("Timeout waiting for devices to be closed");
+ pr_notice("Timeout waiting for devices to be closed\n");
out:
mutex_unlock(&bch_register_lock);
}
@@ -2637,7 +2740,7 @@ static void check_module_parameters(void)
if (bch_cutoff_writeback_sync == 0)
bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC;
else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) {
- pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u",
+ pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n",
bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX);
bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX;
}
@@ -2645,13 +2748,13 @@ static void check_module_parameters(void)
if (bch_cutoff_writeback == 0)
bch_cutoff_writeback = CUTOFF_WRITEBACK;
else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) {
- pr_warn("set bch_cutoff_writeback (%u) to max value %u",
+ pr_warn("set bch_cutoff_writeback (%u) to max value %u\n",
bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX);
bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX;
}
if (bch_cutoff_writeback > bch_cutoff_writeback_sync) {
- pr_warn("set bch_cutoff_writeback (%u) to %u",
+ pr_warn("set bch_cutoff_writeback (%u) to %u\n",
bch_cutoff_writeback, bch_cutoff_writeback_sync);
bch_cutoff_writeback = bch_cutoff_writeback_sync;
}
@@ -2662,6 +2765,9 @@ static int __init bcache_init(void)
static const struct attribute *files[] = {
&ksysfs_register.attr,
&ksysfs_register_quiet.attr,
+#ifdef CONFIG_BCACHE_ASYNC_REGISTRAION
+ &ksysfs_register_async.attr,
+#endif
&ksysfs_pendings_cleanup.attr,
NULL
};
diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
index 323276994aab..0dadec5a78f6 100644
--- a/drivers/md/bcache/sysfs.c
+++ b/drivers/md/bcache/sysfs.c
@@ -421,7 +421,7 @@ STORE(__cached_dev)
return size;
}
if (v == -ENOENT)
- pr_err("Can't attach %s: cache set not found", buf);
+ pr_err("Can't attach %s: cache set not found\n", buf);
return v;
}
@@ -455,7 +455,7 @@ STORE(bch_cached_dev)
*/
if (dc->writeback_running) {
dc->writeback_running = false;
- pr_err("%s: failed to run non-existent writeback thread",
+ pr_err("%s: failed to run non-existent writeback thread\n",
dc->disk.disk->disk_name);
}
} else
@@ -872,11 +872,11 @@ STORE(__bch_cache_set)
if (v) {
if (test_and_set_bit(CACHE_SET_IO_DISABLE,
&c->flags))
- pr_warn("CACHE_SET_IO_DISABLE already set");
+ pr_warn("CACHE_SET_IO_DISABLE already set\n");
} else {
if (!test_and_clear_bit(CACHE_SET_IO_DISABLE,
&c->flags))
- pr_warn("CACHE_SET_IO_DISABLE already cleared");
+ pr_warn("CACHE_SET_IO_DISABLE already cleared\n");
}
}
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 3f7641fb28d5..1cf1e5016cb9 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -809,7 +809,7 @@ static int bch_root_node_dirty_init(struct cache_set *c,
schedule_timeout_interruptible(
msecs_to_jiffies(INIT_KEYS_SLEEP_MS));
else if (ret < 0) {
- pr_warn("sectors dirty init failed, ret=%d!", ret);
+ pr_warn("sectors dirty init failed, ret=%d!\n", ret);
break;
}
} while (ret == -EAGAIN);
@@ -917,7 +917,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL);
if (!state) {
- pr_warn("sectors dirty init failed: cannot allocate memory");
+ pr_warn("sectors dirty init failed: cannot allocate memory\n");
return;
}
@@ -945,7 +945,7 @@ void bch_sectors_dirty_init(struct bcache_device *d)
&state->infos[i],
name);
if (IS_ERR(state->infos[i].thread)) {
- pr_err("fails to run thread bch_dirty_init[%d]", i);
+ pr_err("fails to run thread bch_dirty_init[%d]\n", i);
for (--i; i >= 0; i--)
kthread_stop(state->infos[i].thread);
goto out;
diff --git a/drivers/md/md-linear.h b/drivers/md/md-linear.h
index 8381d651d4ed..24e97db50ebb 100644
--- a/drivers/md/md-linear.h
+++ b/drivers/md/md-linear.h
@@ -12,6 +12,6 @@ struct linear_conf
struct rcu_head rcu;
sector_t array_sectors;
int raid_disks; /* a copy of mddev->raid_disks */
- struct dev_info disks[0];
+ struct dev_info disks[];
};
#endif
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 271e8a587354..f567f536b529 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -89,6 +89,7 @@ static struct module *md_cluster_mod;
static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
static struct workqueue_struct *md_wq;
static struct workqueue_struct *md_misc_wq;
+static struct workqueue_struct *md_rdev_misc_wq;
static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this);
@@ -227,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev,
goto abort;
if (mddev->serial_info_pool == NULL) {
- unsigned int noio_flag;
-
- noio_flag = memalloc_noio_save();
+ /*
+ * already in memalloc noio context by
+ * mddev_suspend()
+ */
mddev->serial_info_pool =
mempool_create_kmalloc_pool(NR_SERIAL_INFOS,
sizeof(struct serial_info));
- memalloc_noio_restore(noio_flag);
if (!mddev->serial_info_pool) {
rdevs_uninit_serial(mddev);
pr_err("can't alloc memory pool for serialization\n");
@@ -466,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
const int sgrp = op_stat_group(bio_op(bio));
- struct mddev *mddev = q->queuedata;
+ struct mddev *mddev = bio->bi_disk->private_data;
unsigned int sectors;
if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) {
@@ -527,11 +528,15 @@ void mddev_suspend(struct mddev *mddev)
wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags));
del_timer_sync(&mddev->safemode_timer);
+ /* restrict memory reclaim I/O during raid array is suspend */
+ mddev->noio_flag = memalloc_noio_save();
}
EXPORT_SYMBOL_GPL(mddev_suspend);
void mddev_resume(struct mddev *mddev)
{
+ /* entred the memalloc scope from mddev_suspend() */
+ memalloc_noio_restore(mddev->noio_flag);
lockdep_assert_held(&mddev->reconfig_mutex);
if (--mddev->suspended)
return;
@@ -2454,7 +2459,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev)
return err;
}
-static void md_delayed_delete(struct work_struct *ws)
+static void rdev_delayed_delete(struct work_struct *ws)
{
struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
kobject_del(&rdev->kobj);
@@ -2479,9 +2484,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev)
* to delay it due to rcu usage.
*/
synchronize_rcu();
- INIT_WORK(&rdev->del_work, md_delayed_delete);
+ INIT_WORK(&rdev->del_work, rdev_delayed_delete);
kobject_get(&rdev->kobj);
- queue_work(md_misc_wq, &rdev->del_work);
+ queue_work(md_rdev_misc_wq, &rdev->del_work);
}
/*
@@ -3191,8 +3196,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len)
rdev->saved_raid_disk = -1;
clear_bit(In_sync, &rdev->flags);
clear_bit(Bitmap_sync, &rdev->flags);
- err = rdev->mddev->pers->
- hot_add_disk(rdev->mddev, rdev);
+ err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev);
if (err) {
rdev->raid_disk = -1;
return err;
@@ -4514,6 +4518,20 @@ null_show(struct mddev *mddev, char *page)
return -EINVAL;
}
+/* need to ensure rdev_delayed_delete() has completed */
+static void flush_rdev_wq(struct mddev *mddev)
+{
+ struct md_rdev *rdev;
+
+ rcu_read_lock();
+ rdev_for_each_rcu(rdev, mddev)
+ if (work_pending(&rdev->del_work)) {
+ flush_workqueue(md_rdev_misc_wq);
+ break;
+ }
+ rcu_read_unlock();
+}
+
static ssize_t
new_dev_store(struct mddev *mddev, const char *buf, size_t len)
{
@@ -4541,8 +4559,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len)
minor != MINOR(dev))
return -EOVERFLOW;
- flush_workqueue(md_misc_wq);
-
+ flush_rdev_wq(mddev);
err = mddev_lock(mddev);
if (err)
return err;
@@ -4780,7 +4797,8 @@ action_store(struct mddev *mddev, const char *page, size_t len)
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) &&
mddev_lock(mddev) == 0) {
- flush_workqueue(md_misc_wq);
+ if (work_pending(&mddev->del_work))
+ flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@@ -5626,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name)
mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE);
if (!mddev->queue)
goto abort;
- mddev->queue->queuedata = mddev;
blk_set_stacking_limits(&mddev->queue->limits);
@@ -6147,7 +6164,8 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
- flush_workqueue(md_misc_wq);
+ if (work_pending(&mddev->del_work))
+ flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
@@ -6200,7 +6218,8 @@ static void __md_stop(struct mddev *mddev)
md_bitmap_destroy(mddev);
mddev_detach(mddev);
/* Ensure ->event_work is done */
- flush_workqueue(md_misc_wq);
+ if (mddev->event_work.func)
+ flush_workqueue(md_misc_wq);
spin_lock(&mddev->lock);
mddev->pers = NULL;
spin_unlock(&mddev->lock);
@@ -7495,9 +7514,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
}
- if (cmd == ADD_NEW_DISK)
- /* need to ensure md_delayed_delete() has completed */
- flush_workqueue(md_misc_wq);
+ if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK)
+ flush_rdev_wq(mddev);
if (cmd == HOT_REMOVE_DISK)
/* need to ensure recovery thread has run */
@@ -7752,7 +7770,8 @@ static int md_open(struct block_device *bdev, fmode_t mode)
*/
mddev_put(mddev);
/* Wait until bdev->bd_disk is definitely gone */
- flush_workqueue(md_misc_wq);
+ if (work_pending(&mddev->del_work))
+ flush_workqueue(md_misc_wq);
/* Then retry the open from the top */
return -ERESTARTSYS;
}
@@ -9040,8 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev,
rdev->recovery_offset = 0;
}
- if (mddev->pers->
- hot_add_disk(mddev, rdev) == 0) {
+ if (mddev->pers->hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */;
if (!test_bit(Journal, &rdev->flags))
@@ -9469,6 +9487,10 @@ static int __init md_init(void)
if (!md_misc_wq)
goto err_misc_wq;
+ md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0);
+ if (!md_misc_wq)
+ goto err_rdev_misc_wq;
+
if ((ret = register_blkdev(MD_MAJOR, "md")) < 0)
goto err_md;
@@ -9490,6 +9512,8 @@ static int __init md_init(void)
err_mdp:
unregister_blkdev(MD_MAJOR, "md");
err_md:
+ destroy_workqueue(md_rdev_misc_wq);
+err_rdev_misc_wq:
destroy_workqueue(md_misc_wq);
err_misc_wq:
destroy_workqueue(md_wq);
@@ -9776,6 +9800,7 @@ static __exit void md_exit(void)
* destroy_workqueue() below will wait for that to complete.
*/
}
+ destroy_workqueue(md_rdev_misc_wq);
destroy_workqueue(md_misc_wq);
destroy_workqueue(md_wq);
}
@@ -9785,7 +9810,7 @@ module_exit(md_exit)
static int get_ro(char *buffer, const struct kernel_param *kp)
{
- return sprintf(buffer, "%d", start_readonly);
+ return sprintf(buffer, "%d\n", start_readonly);
}
static int set_ro(const char *val, const struct kernel_param *kp)
{
diff --git a/drivers/md/md.h b/drivers/md/md.h
index acd681939112..612814d07d35 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -497,6 +497,7 @@ struct mddev {
void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
struct md_cluster_info *cluster_info;
unsigned int good_device_nr; /* good device num within cluster raid */
+ unsigned int noio_flag; /* for memalloc scope API */
bool has_superblocks:1;
bool fail_last_dev:1;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index cd810e195086..dcd27f3da84e 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio)
static void call_bio_endio(struct r1bio *r1_bio)
{
struct bio *bio = r1_bio->master_bio;
- struct r1conf *conf = r1_bio->mddev->private;
if (!test_bit(R1BIO_Uptodate, &r1_bio->state))
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
- /*
- * Wake up any possible resync thread that waits for the device
- * to go idle.
- */
- allow_barrier(conf, r1_bio->sector);
}
static void raid_end_bio_io(struct r1bio *r1_bio)
{
struct bio *bio = r1_bio->master_bio;
+ struct r1conf *conf = r1_bio->mddev->private;
/* if nobody has done the final endio yet, do it now */
if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) {
@@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio)
call_bio_endio(r1_bio);
}
+ /*
+ * Wake up any possible resync thread that waits for the device
+ * to go idle. All I/Os, even write-behind writes, are done.
+ */
+ allow_barrier(conf, r1_bio->sector);
+
free_r1bio(r1_bio);
}
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index e7ccad898736..b7eb09e8c025 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -180,7 +180,7 @@ struct r1bio {
* if the IO is in WRITE direction, then multiple bios are used.
* We choose the number when they are allocated.
*/
- struct bio *bios[0];
+ struct bio *bios[];
/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
};
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index d3eaaf3eb1bc..79cd2b7d3128 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -153,7 +153,7 @@ struct r10bio {
};
sector_t addr;
int devnum;
- } devs[0];
+ } devs[];
};
/* bits for r10bio.state */
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ba00e9877f02..ab8067f9ce8c 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num)
}
/**
- * scribble_len - return the required size of the scribble region
+ * scribble_alloc - allocate percpu scribble buffer for required size
+ * of the scribble region
+ * @percpu - from for_each_present_cpu() of the caller
* @num - total number of disks in the array
+ * @cnt - scribble objs count for required size of the scribble region
*
- * The size must be enough to contain:
+ * The scribble buffer size must be enough to contain:
* 1/ a struct page pointer for each device in the array +2
* 2/ room to convert each entry in (1) to its corresponding dma
* (dma_map_page()) or page (page_address()) address.
@@ -2228,14 +2231,19 @@ static int grow_stripes(struct r5conf *conf, int num)
* of the P and Q blocks.
*/
static int scribble_alloc(struct raid5_percpu *percpu,
- int num, int cnt, gfp_t flags)
+ int num, int cnt)
{
size_t obj_size =
sizeof(struct page *) * (num+2) +
sizeof(addr_conv_t) * (num+2);
void *scribble;
- scribble = kvmalloc_array(cnt, obj_size, flags);
+ /*
+ * If here is in raid array suspend context, it is in memalloc noio
+ * context as well, there is no potential recursive memory reclaim
+ * I/Os with the GFP_KERNEL flag.
+ */
+ scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL);
if (!scribble)
return -ENOMEM;
@@ -2267,8 +2275,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors)
percpu = per_cpu_ptr(conf->percpu, cpu);
err = scribble_alloc(percpu, new_disks,
- new_sectors / STRIPE_SECTORS,
- GFP_NOIO);
+ new_sectors / STRIPE_SECTORS);
if (err)
break;
}
@@ -6759,8 +6766,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu
conf->previous_raid_disks),
max(conf->chunk_sectors,
conf->prev_chunk_sectors)
- / STRIPE_SECTORS,
- GFP_KERNEL)) {
+ / STRIPE_SECTORS)) {
free_scratch_buffer(conf, percpu);
return -ENOMEM;
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index cbf171636766..0585efa47d8f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -19,7 +19,6 @@
#include <linux/pr.h>
#include <linux/ptrace.h>
#include <linux/nvme_ioctl.h>
-#include <linux/t10-pi.h>
#include <linux/pm_qos.h>
#include <asm/unaligned.h>
@@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl)
nvme_put_ctrl(ctrl);
}
-static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
-{
- return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
-}
-
static blk_status_t nvme_error_status(u16 status)
{
switch (status & 0x7ff) {
@@ -433,7 +427,6 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
- list_del_init(&head->entry);
cleanup_srcu_struct(&head->srcu);
nvme_put_subsystem(head->subsys);
kfree(head);
@@ -530,7 +523,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl,
c.directive.opcode = nvme_admin_directive_recv;
c.directive.nsid = cpu_to_le32(nsid);
- c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1);
+ c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s)));
c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM;
c.directive.dtype = NVME_DIR_STREAMS;
@@ -553,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl)
ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL);
if (ret)
- return ret;
+ goto out_disable_stream;
ctrl->nssa = le16_to_cpu(s.nssa);
if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) {
dev_info(ctrl->device, "too few streams (%u) available\n",
ctrl->nssa);
- nvme_disable_streams(ctrl);
- return 0;
+ goto out_disable_stream;
}
ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1);
dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams);
return 0;
+
+out_disable_stream:
+ nvme_disable_streams(ctrl);
+ return ret;
}
/*
@@ -1027,6 +1023,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
+/*
+ * In NVMe 1.0 the CNS field was just a binary controller or namespace
+ * flag, thus sending any new CNS opcodes has a big chance of not working.
+ * Qemu unfortunately had that bug after reporting a 1.1 version compliance
+ * (but not for any later version).
+ */
+static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
+{
+ if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
+ return ctrl->vs < NVME_VS(1, 2, 0);
+ return ctrl->vs < NVME_VS(1, 1, 0);
+}
+
static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
{
struct nvme_command c = { };
@@ -1290,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
meta_len = (io.nblocks + 1) * ns->ms;
metadata = nvme_to_user_ptr(io.metadata);
- if (ns->ext) {
+ if (ns->features & NVME_NS_EXT_LBAS) {
length += meta_len;
meta_len = 0;
} else if (meta_len) {
@@ -1392,8 +1401,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
}
if (effects & NVME_CMD_EFFECTS_CCC)
nvme_init_identify(ctrl);
- if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
+ if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
nvme_queue_scan(ctrl);
+ flush_work(&ctrl->scan_work);
+ }
}
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
@@ -1682,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+ u32 max_integrity_segments)
{
struct blk_integrity integrity;
@@ -1705,20 +1717,15 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
}
integrity.tuple_size = ms;
blk_integrity_register(disk, &integrity);
- blk_queue_max_integrity_segments(disk->queue, 1);
+ blk_queue_max_integrity_segments(disk->queue, max_integrity_segments);
}
#else
-static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
+static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type,
+ u32 max_integrity_segments)
{
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
-static void nvme_set_chunk_size(struct nvme_ns *ns)
-{
- u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
- blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
-}
-
static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
{
struct nvme_ctrl *ctrl = ns->ctrl;
@@ -1804,12 +1811,37 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
}
+static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ u32 *phys_bs, u32 *io_opt)
+{
+ struct streams_directive_params s;
+ int ret;
+
+ if (!ctrl->nr_streams)
+ return 0;
+
+ ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
+ if (ret)
+ return ret;
+
+ ns->sws = le32_to_cpu(s.sws);
+ ns->sgs = le16_to_cpu(s.sgs);
+
+ if (ns->sws) {
+ *phys_bs = ns->sws * (1 << ns->lba_shift);
+ if (ns->sgs)
+ *io_opt = *phys_bs * ns->sgs;
+ }
+
+ return 0;
+}
+
static void nvme_update_disk_info(struct gendisk *disk,
struct nvme_ns *ns, struct nvme_id_ns *id)
{
sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
unsigned short bs = 1 << ns->lba_shift;
- u32 atomic_bs, phys_bs, io_opt;
+ u32 atomic_bs, phys_bs, io_opt = 0;
if (ns->lba_shift > PAGE_SHIFT) {
/* unsupported block size, set capacity to 0 later */
@@ -1818,26 +1850,25 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_freeze_queue(disk->queue);
blk_integrity_unregister(disk);
+ atomic_bs = phys_bs = bs;
+ nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt);
if (id->nabo == 0) {
/*
* Bit 1 indicates whether NAWUPF is defined for this namespace
* and whether it should be used instead of AWUPF. If NAWUPF ==
* 0 then AWUPF must be used instead.
*/
- if (id->nsfeat & (1 << 1) && id->nawupf)
+ if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf)
atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs;
else
atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs;
- } else {
- atomic_bs = bs;
}
- phys_bs = bs;
- io_opt = bs;
- if (id->nsfeat & (1 << 4)) {
+
+ if (id->nsfeat & NVME_NS_FEAT_IO_OPT) {
/* NPWG = Namespace Preferred Write Granularity */
- phys_bs *= 1 + le16_to_cpu(id->npwg);
+ phys_bs = bs * (1 + le16_to_cpu(id->npwg));
/* NOWS = Namespace Optimal Write Size */
- io_opt *= 1 + le16_to_cpu(id->nows);
+ io_opt = bs * (1 + le16_to_cpu(id->nows));
}
blk_queue_logical_block_size(disk->queue, bs);
@@ -1850,19 +1881,34 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_queue_io_min(disk->queue, phys_bs);
blk_queue_io_opt(disk->queue, io_opt);
- if (ns->ms && !ns->ext &&
- (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
- nvme_init_integrity(disk, ns->ms, ns->pi_type);
- if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
- ns->lba_shift > PAGE_SHIFT)
+ /*
+ * The block layer can't support LBA sizes larger than the page size
+ * yet, so catch this early and don't allow block I/O.
+ */
+ if (ns->lba_shift > PAGE_SHIFT)
capacity = 0;
+ /*
+ * Register a metadata profile for PI, or the plain non-integrity NVMe
+ * metadata masquerading as Type 0 if supported, otherwise reject block
+ * I/O to namespaces with metadata except when the namespace supports
+ * PI, as it can strip/insert in that case.
+ */
+ if (ns->ms) {
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ (ns->features & NVME_NS_METADATA_SUPPORTED))
+ nvme_init_integrity(disk, ns->ms, ns->pi_type,
+ ns->ctrl->max_integrity_segments);
+ else if (!nvme_ns_has_pi(ns))
+ capacity = 0;
+ }
+
set_capacity_revalidate_and_notify(disk, capacity, false);
nvme_config_discard(disk, ns);
nvme_config_write_zeroes(disk, ns);
- if (id->nsattr & (1 << 0))
+ if (id->nsattr & NVME_NS_ATTR_RO)
set_disk_ro(disk, true);
else
set_disk_ro(disk, false);
@@ -1870,9 +1916,11 @@ static void nvme_update_disk_info(struct gendisk *disk,
blk_mq_unfreeze_queue(disk->queue);
}
-static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
+static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
{
struct nvme_ns *ns = disk->private_data;
+ struct nvme_ctrl *ctrl = ns->ctrl;
+ u32 iob;
/*
* If identify namespace failed, use default 512 byte block size so
@@ -1881,32 +1929,55 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds;
if (ns->lba_shift == 0)
ns->lba_shift = 9;
- ns->noiob = le16_to_cpu(id->noiob);
+
+ if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
+ is_power_of_2(ctrl->max_hw_sectors))
+ iob = ctrl->max_hw_sectors;
+ else
+ iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob));
+
+ ns->features = 0;
ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms);
- ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT);
/* the PI implementation requires metadata equal t10 pi tuple size */
if (ns->ms == sizeof(struct t10_pi_tuple))
ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
else
ns->pi_type = 0;
- if (ns->noiob)
- nvme_set_chunk_size(ns);
+ if (ns->ms) {
+ /*
+ * For PCIe only the separate metadata pointer is supported,
+ * as the block layer supplies metadata in a separate bio_vec
+ * chain. For Fabrics, only metadata as part of extended data
+ * LBA is supported on the wire per the Fabrics specification,
+ * but the HBA/HCA will do the remapping from the separate
+ * metadata buffers for us.
+ */
+ if (id->flbas & NVME_NS_FLBAS_META_EXT) {
+ ns->features |= NVME_NS_EXT_LBAS;
+ if ((ctrl->ops->flags & NVME_F_FABRICS) &&
+ (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) &&
+ ctrl->max_integrity_segments)
+ ns->features |= NVME_NS_METADATA_SUPPORTED;
+ } else {
+ if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS))
+ return -EINVAL;
+ if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)
+ ns->features |= NVME_NS_METADATA_SUPPORTED;
+ }
+ }
+
+ if (iob)
+ blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob));
nvme_update_disk_info(disk, ns, id);
#ifdef CONFIG_NVME_MULTIPATH
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
- if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
- struct backing_dev_info *info =
- ns->head->disk->queue->backing_dev_info;
-
- info->capabilities |= BDI_CAP_STABLE_WRITES;
- }
-
revalidate_disk(ns->head->disk);
}
#endif
+ return 0;
}
static int nvme_revalidate_disk(struct gendisk *disk)
@@ -1931,7 +2002,6 @@ static int nvme_revalidate_disk(struct gendisk *disk)
goto free_id;
}
- __nvme_revalidate_disk(disk, id);
ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids);
if (ret)
goto free_id;
@@ -1940,8 +2010,10 @@ static int nvme_revalidate_disk(struct gendisk *disk)
dev_err(ctrl->device,
"identifiers changed for nsid %d\n", ns->head->ns_id);
ret = -ENODEV;
+ goto free_id;
}
+ ret = __nvme_revalidate_disk(disk, id);
free_id:
kfree(id);
out:
@@ -2249,10 +2321,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
- if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) &&
- is_power_of_2(ctrl->max_hw_sectors))
- blk_queue_chunk_sectors(q, ctrl->max_hw_sectors);
blk_queue_virt_boundary(q, ctrl->page_size - 1);
+ blk_queue_dma_alignment(q, 7);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
blk_queue_write_cache(q, vwc, vwc);
@@ -2655,7 +2725,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys,
return false;
}
- if ((id->cmic & (1 << 1)) ||
+ if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
(ctrl->opts && ctrl->opts->discovery_nqn))
continue;
@@ -2746,7 +2816,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp,
void *log, size_t size, u64 offset)
{
struct nvme_command c = { };
- unsigned long dwlen = size / 4 - 1;
+ u32 dwlen = nvme_bytes_to_numd(size);
c.get_log_page.opcode = nvme_admin_get_log_page;
c.get_log_page.nsid = cpu_to_le32(nsid);
@@ -3401,7 +3471,6 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
list_for_each_entry(h, &subsys->nsheads, entry) {
if (nvme_ns_ids_valid(&new->ids) &&
- !list_empty(&h->list) &&
nvme_ns_ids_equal(&new->ids, &h->ids))
return -EINVAL;
}
@@ -3410,8 +3479,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys,
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
- unsigned nsid, struct nvme_id_ns *id,
- struct nvme_ns_ids *ids)
+ unsigned nsid, struct nvme_ns_ids *ids)
{
struct nvme_ns_head *head;
size_t size = sizeof(*head);
@@ -3469,42 +3537,51 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid,
struct nvme_id_ns *id)
{
struct nvme_ctrl *ctrl = ns->ctrl;
- bool is_shared = id->nmic & (1 << 0);
+ bool is_shared = id->nmic & NVME_NS_NMIC_SHARED;
struct nvme_ns_head *head = NULL;
struct nvme_ns_ids ids;
int ret = 0;
ret = nvme_report_ns_ids(ctrl, nsid, id, &ids);
- if (ret)
- goto out;
+ if (ret) {
+ if (ret < 0)
+ return ret;
+ return blk_status_to_errno(nvme_error_status(ret));
+ }
mutex_lock(&ctrl->subsys->lock);
- if (is_shared)
- head = nvme_find_ns_head(ctrl->subsys, nsid);
+ head = nvme_find_ns_head(ctrl->subsys, nsid);
if (!head) {
- head = nvme_alloc_ns_head(ctrl, nsid, id, &ids);
+ head = nvme_alloc_ns_head(ctrl, nsid, &ids);
if (IS_ERR(head)) {
ret = PTR_ERR(head);
goto out_unlock;
}
+ head->shared = is_shared;
} else {
+ ret = -EINVAL;
+ if (!is_shared || !head->shared) {
+ dev_err(ctrl->device,
+ "Duplicate unshared namespace %d\n", nsid);
+ goto out_put_ns_head;
+ }
if (!nvme_ns_ids_equal(&head->ids, &ids)) {
dev_err(ctrl->device,
"IDs don't match for shared namespace %d\n",
nsid);
- ret = -EINVAL;
- goto out_unlock;
+ goto out_put_ns_head;
}
}
list_add_tail(&ns->siblings, &head->list);
ns->head = head;
+ mutex_unlock(&ctrl->subsys->lock);
+ return 0;
+out_put_ns_head:
+ nvme_put_ns_head(head);
out_unlock:
mutex_unlock(&ctrl->subsys->lock);
-out:
- if (ret > 0)
- ret = blk_status_to_errno(nvme_error_status(ret));
return ret;
}
@@ -3535,32 +3612,6 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
return ret;
}
-static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns)
-{
- struct streams_directive_params s;
- int ret;
-
- if (!ctrl->nr_streams)
- return 0;
-
- ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id);
- if (ret)
- return ret;
-
- ns->sws = le32_to_cpu(s.sws);
- ns->sgs = le16_to_cpu(s.sgs);
-
- if (ns->sws) {
- unsigned int bs = 1 << ns->lba_shift;
-
- blk_queue_io_min(ns->queue, bs * ns->sws);
- if (ns->sgs)
- blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs);
- }
-
- return 0;
-}
-
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
@@ -3604,7 +3655,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
ret = nvme_init_ns_head(ns, nsid, id);
if (ret)
goto out_free_id;
- nvme_setup_streams_ns(ctrl, ns);
nvme_set_disk_name(disk_name, ns, ctrl, &flags);
disk = alloc_disk_node(0, node);
@@ -3618,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
memcpy(disk->disk_name, disk_name, DISK_NAME_LEN);
ns->disk = disk;
- __nvme_revalidate_disk(disk, id);
+ if (__nvme_revalidate_disk(disk, id))
+ goto out_free_disk;
if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
ret = nvme_nvm_register(ns, disk_name, node);
@@ -3645,9 +3696,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
/* prevent double queue cleanup */
ns->disk->queue = NULL;
put_disk(ns->disk);
+ out_free_disk:
+ del_gendisk(ns->disk);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
+ if (list_empty(&ns->head->list))
+ list_del_init(&ns->head->entry);
mutex_unlock(&ctrl->subsys->lock);
nvme_put_ns_head(ns->head);
out_free_id:
@@ -3667,7 +3722,10 @@ static void nvme_ns_remove(struct nvme_ns *ns)
mutex_lock(&ns->ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
+ if (list_empty(&ns->head->list))
+ list_del_init(&ns->head->entry);
mutex_unlock(&ns->ctrl->subsys->lock);
+
synchronize_rcu(); /* guarantee not available in head->list */
nvme_mpath_clear_current_path(ns);
synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
@@ -3687,6 +3745,16 @@ static void nvme_ns_remove(struct nvme_ns *ns)
nvme_put_ns(ns);
}
+static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
+{
+ struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid);
+
+ if (ns) {
+ nvme_ns_remove(ns);
+ nvme_put_ns(ns);
+ }
+}
+
static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns;
@@ -3718,39 +3786,34 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
}
-static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
+static int nvme_scan_ns_list(struct nvme_ctrl *ctrl)
{
- struct nvme_ns *ns;
+ const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32);
__le32 *ns_list;
- unsigned i, j, nsid, prev = 0;
- unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
- int ret = 0;
+ u32 prev = 0;
+ int ret = 0, i;
+
+ if (nvme_ctrl_limited_cns(ctrl))
+ return -EOPNOTSUPP;
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
if (!ns_list)
return -ENOMEM;
- for (i = 0; i < num_lists; i++) {
+ for (;;) {
ret = nvme_identify_ns_list(ctrl, prev, ns_list);
if (ret)
goto free;
- for (j = 0; j < min(nn, 1024U); j++) {
- nsid = le32_to_cpu(ns_list[j]);
- if (!nsid)
- goto out;
+ for (i = 0; i < nr_entries; i++) {
+ u32 nsid = le32_to_cpu(ns_list[i]);
+ if (!nsid) /* end of the list? */
+ goto out;
nvme_validate_ns(ctrl, nsid);
-
- while (++prev < nsid) {
- ns = nvme_find_get_ns(ctrl, prev);
- if (ns) {
- nvme_ns_remove(ns);
- nvme_put_ns(ns);
- }
- }
+ while (++prev < nsid)
+ nvme_ns_remove_by_nsid(ctrl, prev);
}
- nn -= j;
}
out:
nvme_remove_invalid_namespaces(ctrl, prev);
@@ -3759,9 +3822,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
return ret;
}
-static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
+static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl)
{
- unsigned i;
+ struct nvme_id_ctrl *id;
+ u32 nn, i;
+
+ if (nvme_identify_ctrl(ctrl, &id))
+ return;
+ nn = le32_to_cpu(id->nn);
+ kfree(id);
for (i = 1; i <= nn; i++)
nvme_validate_ns(ctrl, i);
@@ -3798,8 +3867,6 @@ static void nvme_scan_work(struct work_struct *work)
{
struct nvme_ctrl *ctrl =
container_of(work, struct nvme_ctrl, scan_work);
- struct nvme_id_ctrl *id;
- unsigned nn;
/* No tagset on a live ctrl means IO queues could not created */
if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
@@ -3810,20 +3877,11 @@ static void nvme_scan_work(struct work_struct *work)
nvme_clear_changed_ns_log(ctrl);
}
- if (nvme_identify_ctrl(ctrl, &id))
- return;
-
mutex_lock(&ctrl->scan_lock);
- nn = le32_to_cpu(id->nn);
- if (ctrl->vs >= NVME_VS(1, 1, 0) &&
- !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) {
- if (!nvme_scan_ns_list(ctrl, nn))
- goto out_free_id;
- }
- nvme_scan_ns_sequential(ctrl, nn);
-out_free_id:
+ if (nvme_scan_ns_list(ctrl) != 0)
+ nvme_scan_ns_sequential(ctrl);
mutex_unlock(&ctrl->scan_lock);
- kfree(id);
+
down_write(&ctrl->namespaces_rwsem);
list_sort(NULL, &ctrl->namespaces, ns_cmp);
up_write(&ctrl->namespaces_rwsem);
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 7dfc4a2ecf1e..cb0007592c12 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -14,6 +14,7 @@
#include "fabrics.h"
#include <linux/nvme-fc-driver.h>
#include <linux/nvme-fc.h>
+#include "fc.h"
#include <scsi/scsi_transport_fc.h>
/* *************************** Data Structures/Defines ****************** */
@@ -61,6 +62,17 @@ struct nvmefc_ls_req_op {
bool req_queued;
};
+struct nvmefc_ls_rcv_op {
+ struct nvme_fc_rport *rport;
+ struct nvmefc_ls_rsp *lsrsp;
+ union nvmefc_ls_requests *rqstbuf;
+ union nvmefc_ls_responses *rspbuf;
+ u16 rqstdatalen;
+ bool handled;
+ dma_addr_t rspdma;
+ struct list_head lsrcv_list; /* rport->ls_rcv_list */
+} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
+
enum nvme_fcpop_state {
FCPOP_STATE_UNINIT = 0,
FCPOP_STATE_IDLE = 1,
@@ -96,7 +108,7 @@ struct nvme_fc_fcp_op {
struct nvme_fcp_op_w_sgl {
struct nvme_fc_fcp_op op;
struct scatterlist sgl[NVME_INLINE_SG_CNT];
- uint8_t priv[0];
+ uint8_t priv[];
};
struct nvme_fc_lport {
@@ -117,6 +129,7 @@ struct nvme_fc_rport {
struct list_head endp_list; /* for lport->endp_list */
struct list_head ctrl_list;
struct list_head ls_req_list;
+ struct list_head ls_rcv_list;
struct list_head disc_list;
struct device *dev; /* physical device for dma */
struct nvme_fc_lport *lport;
@@ -124,11 +137,12 @@ struct nvme_fc_rport {
struct kref ref;
atomic_t act_ctrl_cnt;
unsigned long dev_loss_end;
+ struct work_struct lsrcv_work;
} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */
-enum nvme_fcctrl_flags {
- FCCTRL_TERMIO = (1 << 0),
-};
+/* fc_ctrl flags values - specified as bit positions */
+#define ASSOC_ACTIVE 0
+#define FCCTRL_TERMIO 1
struct nvme_fc_ctrl {
spinlock_t lock;
@@ -139,9 +153,9 @@ struct nvme_fc_ctrl {
u32 cnum;
bool ioq_live;
- bool assoc_active;
atomic_t err_work_active;
u64 association_id;
+ struct nvmefc_ls_rcv_op *rcv_disconn;
struct list_head ctrl_list; /* rport->ctrl_list */
@@ -152,7 +166,7 @@ struct nvme_fc_ctrl {
struct work_struct err_work;
struct kref ref;
- u32 flags;
+ unsigned long flags;
u32 iocnt;
wait_queue_head_t ioabort_wait;
@@ -219,6 +233,9 @@ static struct device *fc_udev_device;
static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *,
struct nvme_fc_queue *, unsigned int);
+static void nvme_fc_handle_ls_rqst_work(struct work_struct *work);
+
+
static void
nvme_fc_free_lport(struct kref *ref)
{
@@ -394,7 +411,10 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo,
newrec->ops = template;
newrec->dev = dev;
ida_init(&newrec->endp_cnt);
- newrec->localport.private = &newrec[1];
+ if (template->local_priv_sz)
+ newrec->localport.private = &newrec[1];
+ else
+ newrec->localport.private = NULL;
newrec->localport.node_name = pinfo->node_name;
newrec->localport.port_name = pinfo->port_name;
newrec->localport.port_role = pinfo->port_role;
@@ -701,9 +721,13 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
atomic_set(&newrec->act_ctrl_cnt, 0);
spin_lock_init(&newrec->lock);
newrec->remoteport.localport = &lport->localport;
+ INIT_LIST_HEAD(&newrec->ls_rcv_list);
newrec->dev = lport->dev;
newrec->lport = lport;
- newrec->remoteport.private = &newrec[1];
+ if (lport->ops->remote_priv_sz)
+ newrec->remoteport.private = &newrec[1];
+ else
+ newrec->remoteport.private = NULL;
newrec->remoteport.port_role = pinfo->port_role;
newrec->remoteport.node_name = pinfo->node_name;
newrec->remoteport.port_name = pinfo->port_name;
@@ -711,6 +735,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport,
newrec->remoteport.port_state = FC_OBJSTATE_ONLINE;
newrec->remoteport.port_num = idx;
__nvme_fc_set_dev_loss_tmo(newrec, pinfo);
+ INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work);
spin_lock_irqsave(&nvme_fc_lock, flags);
list_add_tail(&newrec->endp_list, &lport->endp_list);
@@ -1000,6 +1025,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *);
static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *);
+static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
static void
__nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop)
@@ -1140,41 +1166,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport,
return __nvme_fc_send_ls_req(rport, lsop, done);
}
-/* Validation Error indexes into the string table below */
-enum {
- VERR_NO_ERROR = 0,
- VERR_LSACC = 1,
- VERR_LSDESC_RQST = 2,
- VERR_LSDESC_RQST_LEN = 3,
- VERR_ASSOC_ID = 4,
- VERR_ASSOC_ID_LEN = 5,
- VERR_CONN_ID = 6,
- VERR_CONN_ID_LEN = 7,
- VERR_CR_ASSOC = 8,
- VERR_CR_ASSOC_ACC_LEN = 9,
- VERR_CR_CONN = 10,
- VERR_CR_CONN_ACC_LEN = 11,
- VERR_DISCONN = 12,
- VERR_DISCONN_ACC_LEN = 13,
-};
-
-static char *validation_errors[] = {
- "OK",
- "Not LS_ACC",
- "Not LSDESC_RQST",
- "Bad LSDESC_RQST Length",
- "Not Association ID",
- "Bad Association ID Length",
- "Not Connection ID",
- "Bad Connection ID Length",
- "Not CR_ASSOC Rqst",
- "Bad CR_ASSOC ACC Length",
- "Not CR_CONN Rqst",
- "Bad CR_CONN ACC Length",
- "Not Disconnect Rqst",
- "Bad Disconnect ACC Length",
-};
-
static int
nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio)
@@ -1183,21 +1174,27 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
struct nvmefc_ls_req *lsreq;
struct fcnvme_ls_cr_assoc_rqst *assoc_rqst;
struct fcnvme_ls_cr_assoc_acc *assoc_acc;
+ unsigned long flags;
int ret, fcret = 0;
lsop = kzalloc((sizeof(*lsop) +
- ctrl->lport->ops->lsrqst_priv_sz +
- sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL);
+ sizeof(*assoc_rqst) + sizeof(*assoc_acc) +
+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
if (!lsop) {
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: send Create Association failed: ENOMEM\n",
+ ctrl->cnum);
ret = -ENOMEM;
goto out_no_memory;
}
- lsreq = &lsop->ls_req;
- lsreq->private = (void *)&lsop[1];
- assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)
- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
+ assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1];
assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1];
+ lsreq = &lsop->ls_req;
+ if (ctrl->lport->ops->lsrqst_priv_sz)
+ lsreq->private = &assoc_acc[1];
+ else
+ lsreq->private = NULL;
assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION;
assoc_rqst->desc_list_len =
@@ -1267,11 +1264,13 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl,
"q %d Create Association LS failed: %s\n",
queue->qnum, validation_errors[fcret]);
} else {
+ spin_lock_irqsave(&ctrl->lock, flags);
ctrl->association_id =
be64_to_cpu(assoc_acc->associd.association_id);
queue->connection_id =
be64_to_cpu(assoc_acc->connectid.connection_id);
set_bit(NVME_FC_Q_CONNECTED, &queue->flags);
+ spin_unlock_irqrestore(&ctrl->lock, flags);
}
out_free_buffer:
@@ -1295,18 +1294,23 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
int ret, fcret = 0;
lsop = kzalloc((sizeof(*lsop) +
- ctrl->lport->ops->lsrqst_priv_sz +
- sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL);
+ sizeof(*conn_rqst) + sizeof(*conn_acc) +
+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
if (!lsop) {
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: send Create Connection failed: ENOMEM\n",
+ ctrl->cnum);
ret = -ENOMEM;
goto out_no_memory;
}
- lsreq = &lsop->ls_req;
- lsreq->private = (void *)&lsop[1];
- conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)
- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
+ conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1];
conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1];
+ lsreq = &lsop->ls_req;
+ if (ctrl->lport->ops->lsrqst_priv_sz)
+ lsreq->private = (void *)&conn_acc[1];
+ else
+ lsreq->private = NULL;
conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION;
conn_rqst->desc_list_len = cpu_to_be32(
@@ -1420,54 +1424,385 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
int ret;
lsop = kzalloc((sizeof(*lsop) +
- ctrl->lport->ops->lsrqst_priv_sz +
- sizeof(*discon_rqst) + sizeof(*discon_acc)),
- GFP_KERNEL);
- if (!lsop)
- /* couldn't sent it... too bad */
+ sizeof(*discon_rqst) + sizeof(*discon_acc) +
+ ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL);
+ if (!lsop) {
+ dev_info(ctrl->ctrl.device,
+ "NVME-FC{%d}: send Disconnect Association "
+ "failed: ENOMEM\n",
+ ctrl->cnum);
return;
+ }
+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
lsreq = &lsop->ls_req;
+ if (ctrl->lport->ops->lsrqst_priv_sz)
+ lsreq->private = (void *)&discon_acc[1];
+ else
+ lsreq->private = NULL;
- lsreq->private = (void *)&lsop[1];
- discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)
- (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz);
- discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
+ nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
+ ctrl->association_id);
- discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
- discon_rqst->desc_list_len = cpu_to_be32(
- sizeof(struct fcnvme_lsdesc_assoc_id) +
- sizeof(struct fcnvme_lsdesc_disconn_cmd));
+ ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
+ nvme_fc_disconnect_assoc_done);
+ if (ret)
+ kfree(lsop);
+}
- discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
- discon_rqst->associd.desc_len =
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_lsdesc_assoc_id));
+static void
+nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
+{
+ struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private;
+ struct nvme_fc_rport *rport = lsop->rport;
+ struct nvme_fc_lport *lport = rport->lport;
+ unsigned long flags;
+
+ spin_lock_irqsave(&rport->lock, flags);
+ list_del(&lsop->lsrcv_list);
+ spin_unlock_irqrestore(&rport->lock, flags);
+
+ fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma,
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+ fc_dma_unmap_single(lport->dev, lsop->rspdma,
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+
+ kfree(lsop);
+
+ nvme_fc_rport_put(rport);
+}
+
+static void
+nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop)
+{
+ struct nvme_fc_rport *rport = lsop->rport;
+ struct nvme_fc_lport *lport = rport->lport;
+ struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
+ int ret;
+
+ fc_dma_sync_single_for_device(lport->dev, lsop->rspdma,
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+
+ ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport,
+ lsop->lsrsp);
+ if (ret) {
+ dev_warn(lport->dev,
+ "LLDD rejected LS RSP xmt: LS %d status %d\n",
+ w0->ls_cmd, ret);
+ nvme_fc_xmt_ls_rsp_done(lsop->lsrsp);
+ return;
+ }
+}
+
+static struct nvme_fc_ctrl *
+nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport,
+ struct nvmefc_ls_rcv_op *lsop)
+{
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+ &lsop->rqstbuf->rq_dis_assoc;
+ struct nvme_fc_ctrl *ctrl, *ret = NULL;
+ struct nvmefc_ls_rcv_op *oldls = NULL;
+ u64 association_id = be64_to_cpu(rqst->associd.association_id);
+ unsigned long flags;
+
+ spin_lock_irqsave(&rport->lock, flags);
+
+ list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) {
+ if (!nvme_fc_ctrl_get(ctrl))
+ continue;
+ spin_lock(&ctrl->lock);
+ if (association_id == ctrl->association_id) {
+ oldls = ctrl->rcv_disconn;
+ ctrl->rcv_disconn = lsop;
+ ret = ctrl;
+ }
+ spin_unlock(&ctrl->lock);
+ if (ret)
+ /* leave the ctrl get reference */
+ break;
+ nvme_fc_ctrl_put(ctrl);
+ }
+
+ spin_unlock_irqrestore(&rport->lock, flags);
+
+ /* transmit a response for anything that was pending */
+ if (oldls) {
+ dev_info(rport->lport->dev,
+ "NVME-FC{%d}: Multiple Disconnect Association "
+ "LS's received\n", ctrl->cnum);
+ /* overwrite good response with bogus failure */
+ oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
+ sizeof(*oldls->rspbuf),
+ rqst->w0.ls_cmd,
+ FCNVME_RJT_RC_UNAB,
+ FCNVME_RJT_EXP_NONE, 0);
+ nvme_fc_xmt_ls_rsp(oldls);
+ }
+
+ return ret;
+}
+
+/*
+ * returns true to mean LS handled and ls_rsp can be sent
+ * returns false to defer ls_rsp xmt (will be done as part of
+ * association termination)
+ */
+static bool
+nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop)
+{
+ struct nvme_fc_rport *rport = lsop->rport;
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst =
+ &lsop->rqstbuf->rq_dis_assoc;
+ struct fcnvme_ls_disconnect_assoc_acc *acc =
+ &lsop->rspbuf->rsp_dis_assoc;
+ struct nvme_fc_ctrl *ctrl = NULL;
+ int ret = 0;
+
+ memset(acc, 0, sizeof(*acc));
+
+ ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst);
+ if (!ret) {
+ /* match an active association */
+ ctrl = nvme_fc_match_disconn_ls(rport, lsop);
+ if (!ctrl)
+ ret = VERR_NO_ASSOC;
+ }
+
+ if (ret) {
+ dev_info(rport->lport->dev,
+ "Disconnect LS failed: %s\n",
+ validation_errors[ret]);
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+ sizeof(*acc), rqst->w0.ls_cmd,
+ (ret == VERR_NO_ASSOC) ?
+ FCNVME_RJT_RC_INV_ASSOC :
+ FCNVME_RJT_RC_LOGIC,
+ FCNVME_RJT_EXP_NONE, 0);
+ return true;
+ }
- discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id);
+ /* format an ACCept response */
- discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
- FCNVME_LSDESC_DISCONN_CMD);
- discon_rqst->discon_cmd.desc_len =
+ lsop->lsrsp->rsplen = sizeof(*acc);
+
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
- sizeof(struct fcnvme_lsdesc_disconn_cmd));
+ sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
+ FCNVME_LS_DISCONNECT_ASSOC);
- lsreq->rqstaddr = discon_rqst;
- lsreq->rqstlen = sizeof(*discon_rqst);
- lsreq->rspaddr = discon_acc;
- lsreq->rsplen = sizeof(*discon_acc);
- lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
+ /*
+ * the transmit of the response will occur after the exchanges
+ * for the association have been ABTS'd by
+ * nvme_fc_delete_association().
+ */
- ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop,
- nvme_fc_disconnect_assoc_done);
- if (ret)
- kfree(lsop);
+ /* fail the association */
+ nvme_fc_error_recovery(ctrl, "Disconnect Association LS received");
+
+ /* release the reference taken by nvme_fc_match_disconn_ls() */
+ nvme_fc_ctrl_put(ctrl);
+
+ return false;
}
+/*
+ * Actual Processing routine for received FC-NVME LS Requests from the LLD
+ * returns true if a response should be sent afterward, false if rsp will
+ * be sent asynchronously.
+ */
+static bool
+nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop)
+{
+ struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0;
+ bool ret = true;
+
+ lsop->lsrsp->nvme_fc_private = lsop;
+ lsop->lsrsp->rspbuf = lsop->rspbuf;
+ lsop->lsrsp->rspdma = lsop->rspdma;
+ lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done;
+ /* Be preventative. handlers will later set to valid length */
+ lsop->lsrsp->rsplen = 0;
-/* *********************** NVME Ctrl Routines **************************** */
+ /*
+ * handlers:
+ * parse request input, execute the request, and format the
+ * LS response
+ */
+ switch (w0->ls_cmd) {
+ case FCNVME_LS_DISCONNECT_ASSOC:
+ ret = nvme_fc_ls_disconnect_assoc(lsop);
+ break;
+ case FCNVME_LS_DISCONNECT_CONN:
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
+ sizeof(*lsop->rspbuf), w0->ls_cmd,
+ FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0);
+ break;
+ case FCNVME_LS_CREATE_ASSOCIATION:
+ case FCNVME_LS_CREATE_CONNECTION:
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
+ sizeof(*lsop->rspbuf), w0->ls_cmd,
+ FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0);
+ break;
+ default:
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf,
+ sizeof(*lsop->rspbuf), w0->ls_cmd,
+ FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
+ break;
+ }
-static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
+ return(ret);
+}
+
+static void
+nvme_fc_handle_ls_rqst_work(struct work_struct *work)
+{
+ struct nvme_fc_rport *rport =
+ container_of(work, struct nvme_fc_rport, lsrcv_work);
+ struct fcnvme_ls_rqst_w0 *w0;
+ struct nvmefc_ls_rcv_op *lsop;
+ unsigned long flags;
+ bool sendrsp;
+
+restart:
+ sendrsp = true;
+ spin_lock_irqsave(&rport->lock, flags);
+ list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) {
+ if (lsop->handled)
+ continue;
+
+ lsop->handled = true;
+ if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) {
+ spin_unlock_irqrestore(&rport->lock, flags);
+ sendrsp = nvme_fc_handle_ls_rqst(lsop);
+ } else {
+ spin_unlock_irqrestore(&rport->lock, flags);
+ w0 = &lsop->rqstbuf->w0;
+ lsop->lsrsp->rsplen = nvme_fc_format_rjt(
+ lsop->rspbuf,
+ sizeof(*lsop->rspbuf),
+ w0->ls_cmd,
+ FCNVME_RJT_RC_UNAB,
+ FCNVME_RJT_EXP_NONE, 0);
+ }
+ if (sendrsp)
+ nvme_fc_xmt_ls_rsp(lsop);
+ goto restart;
+ }
+ spin_unlock_irqrestore(&rport->lock, flags);
+}
+
+/**
+ * nvme_fc_rcv_ls_req - transport entry point called by an LLDD
+ * upon the reception of a NVME LS request.
+ *
+ * The nvme-fc layer will copy payload to an internal structure for
+ * processing. As such, upon completion of the routine, the LLDD may
+ * immediately free/reuse the LS request buffer passed in the call.
+ *
+ * If this routine returns error, the LLDD should abort the exchange.
+ *
+ * @remoteport: pointer to the (registered) remote port that the LS
+ * was received from. The remoteport is associated with
+ * a specific localport.
+ * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be
+ * used to reference the exchange corresponding to the LS
+ * when issuing an ls response.
+ * @lsreqbuf: pointer to the buffer containing the LS Request
+ * @lsreqbuf_len: length, in bytes, of the received LS request
+ */
+int
+nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr,
+ struct nvmefc_ls_rsp *lsrsp,
+ void *lsreqbuf, u32 lsreqbuf_len)
+{
+ struct nvme_fc_rport *rport = remoteport_to_rport(portptr);
+ struct nvme_fc_lport *lport = rport->lport;
+ struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
+ struct nvmefc_ls_rcv_op *lsop;
+ unsigned long flags;
+ int ret;
+
+ nvme_fc_rport_get(rport);
+
+ /* validate there's a routine to transmit a response */
+ if (!lport->ops->xmt_ls_rsp) {
+ dev_info(lport->dev,
+ "RCV %s LS failed: no LLDD xmt_ls_rsp\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
+ dev_info(lport->dev,
+ "RCV %s LS failed: payload too large\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
+ ret = -E2BIG;
+ goto out_put;
+ }
+
+ lsop = kzalloc(sizeof(*lsop) +
+ sizeof(union nvmefc_ls_requests) +
+ sizeof(union nvmefc_ls_responses),
+ GFP_KERNEL);
+ if (!lsop) {
+ dev_info(lport->dev,
+ "RCV %s LS failed: No memory\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
+ ret = -ENOMEM;
+ goto out_put;
+ }
+ lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1];
+ lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1];
+
+ lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf,
+ sizeof(*lsop->rspbuf),
+ DMA_TO_DEVICE);
+ if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) {
+ dev_info(lport->dev,
+ "RCV %s LS failed: DMA mapping failure\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
+ ret = -EFAULT;
+ goto out_free;
+ }
+
+ lsop->rport = rport;
+ lsop->lsrsp = lsrsp;
+
+ memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len);
+ lsop->rqstdatalen = lsreqbuf_len;
+
+ spin_lock_irqsave(&rport->lock, flags);
+ if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) {
+ spin_unlock_irqrestore(&rport->lock, flags);
+ ret = -ENOTCONN;
+ goto out_unmap;
+ }
+ list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list);
+ spin_unlock_irqrestore(&rport->lock, flags);
+
+ schedule_work(&rport->lsrcv_work);
+
+ return 0;
+
+out_unmap:
+ fc_dma_unmap_single(lport->dev, lsop->rspdma,
+ sizeof(*lsop->rspbuf), DMA_TO_DEVICE);
+out_free:
+ kfree(lsop);
+out_put:
+ nvme_fc_rport_put(rport);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req);
+
+
+/* *********************** NVME Ctrl Routines **************************** */
static void
__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
@@ -1500,7 +1835,7 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op)
opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED);
if (opstate != FCPOP_STATE_ACTIVE)
atomic_set(&op->state, opstate);
- else if (ctrl->flags & FCCTRL_TERMIO)
+ else if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
ctrl->iocnt++;
spin_unlock_irqrestore(&ctrl->lock, flags);
@@ -1537,7 +1872,7 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl,
if (opstate == FCPOP_STATE_ABORTED) {
spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO) {
+ if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) {
if (!--ctrl->iocnt)
wake_up(&ctrl->ioabort_wait);
}
@@ -1771,7 +2106,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq,
res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++);
if (res)
return res;
- op->op.fcp_req.first_sgl = &op->sgl[0];
+ op->op.fcp_req.first_sgl = op->sgl;
op->op.fcp_req.private = &op->priv[0];
nvme_req(rq)->ctrl = &ctrl->ctrl;
return res;
@@ -1783,15 +2118,17 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl)
struct nvme_fc_fcp_op *aen_op;
struct nvme_fc_cmd_iu *cmdiu;
struct nvme_command *sqe;
- void *private;
+ void *private = NULL;
int i, ret;
aen_op = ctrl->aen_ops;
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
- private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
+ if (ctrl->lport->ops->fcprqst_priv_sz) {
+ private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz,
GFP_KERNEL);
- if (!private)
- return -ENOMEM;
+ if (!private)
+ return -ENOMEM;
+ }
cmdiu = &aen_op->cmd_iu;
sqe = &cmdiu->sqe;
@@ -1822,9 +2159,6 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl)
aen_op = ctrl->aen_ops;
for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) {
- if (!aen_op->fcp_req.private)
- continue;
-
__nvme_fc_exit_request(ctrl, aen_op);
kfree(aen_op->fcp_req.private);
@@ -2366,16 +2700,9 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg);
struct nvme_fc_fcp_op *aen_op;
- unsigned long flags;
- bool terminating = false;
blk_status_t ret;
- spin_lock_irqsave(&ctrl->lock, flags);
- if (ctrl->flags & FCCTRL_TERMIO)
- terminating = true;
- spin_unlock_irqrestore(&ctrl->lock, flags);
-
- if (terminating)
+ if (test_bit(FCCTRL_TERMIO, &ctrl->flags))
return;
aen_op = &ctrl->aen_ops[0];
@@ -2584,10 +2911,9 @@ nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl)
struct nvme_fc_rport *rport = ctrl->rport;
u32 cnt;
- if (ctrl->assoc_active)
+ if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags))
return 1;
- ctrl->assoc_active = true;
cnt = atomic_inc_return(&rport->act_ctrl_cnt);
if (cnt == 1)
nvme_fc_rport_active_on_lport(rport);
@@ -2602,7 +2928,7 @@ nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl)
struct nvme_fc_lport *lport = rport->lport;
u32 cnt;
- /* ctrl->assoc_active=false will be set independently */
+ /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */
cnt = atomic_dec_return(&rport->act_ctrl_cnt);
if (cnt == 0) {
@@ -2622,6 +2948,8 @@ static int
nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ struct nvmefc_ls_rcv_op *disls = NULL;
+ unsigned long flags;
int ret;
bool changed;
@@ -2739,12 +3067,18 @@ out_term_aen_ops:
out_disconnect_admin_queue:
/* send a Disconnect(association) LS to fc-nvme target */
nvme_fc_xmt_disconnect_assoc(ctrl);
+ spin_lock_irqsave(&ctrl->lock, flags);
ctrl->association_id = 0;
+ disls = ctrl->rcv_disconn;
+ ctrl->rcv_disconn = NULL;
+ spin_unlock_irqrestore(&ctrl->lock, flags);
+ if (disls)
+ nvme_fc_xmt_ls_rsp(disls);
out_delete_hw_queue:
__nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0);
out_free_queue:
nvme_fc_free_queue(&ctrl->queues[0]);
- ctrl->assoc_active = false;
+ clear_bit(ASSOC_ACTIVE, &ctrl->flags);
nvme_fc_ctlr_inactive_on_rport(ctrl);
return ret;
@@ -2759,14 +3093,14 @@ out_free_queue:
static void
nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
{
+ struct nvmefc_ls_rcv_op *disls = NULL;
unsigned long flags;
- if (!ctrl->assoc_active)
+ if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags))
return;
- ctrl->assoc_active = false;
spin_lock_irqsave(&ctrl->lock, flags);
- ctrl->flags |= FCCTRL_TERMIO;
+ set_bit(FCCTRL_TERMIO, &ctrl->flags);
ctrl->iocnt = 0;
spin_unlock_irqrestore(&ctrl->lock, flags);
@@ -2817,7 +3151,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
/* wait for all io that had to be aborted */
spin_lock_irq(&ctrl->lock);
wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock);
- ctrl->flags &= ~FCCTRL_TERMIO;
+ clear_bit(FCCTRL_TERMIO, &ctrl->flags);
spin_unlock_irq(&ctrl->lock);
nvme_fc_term_aen_ops(ctrl);
@@ -2831,7 +3165,17 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
if (ctrl->association_id)
nvme_fc_xmt_disconnect_assoc(ctrl);
+ spin_lock_irqsave(&ctrl->lock, flags);
ctrl->association_id = 0;
+ disls = ctrl->rcv_disconn;
+ ctrl->rcv_disconn = NULL;
+ spin_unlock_irqrestore(&ctrl->lock, flags);
+ if (disls)
+ /*
+ * if a Disconnect Request was waiting for a response, send
+ * now that all ABTS's have been issued (and are complete).
+ */
+ nvme_fc_xmt_ls_rsp(disls);
if (ctrl->ctrl.tagset) {
nvme_fc_delete_hw_io_queues(ctrl);
@@ -2902,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status)
dev_warn(ctrl->ctrl.device,
"NVME-FC{%d}: dev_loss_tmo (%d) expired "
"while waiting for remoteport connectivity.\n",
- ctrl->cnum, portptr->dev_loss_tmo);
+ ctrl->cnum, min_t(int, portptr->dev_loss_tmo,
+ (ctrl->ctrl.opts->max_reconnects *
+ ctrl->ctrl.opts->reconnect_delay)));
WARN_ON(nvme_delete_ctrl(&ctrl->ctrl));
}
}
@@ -3089,7 +3435,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
ctrl->dev = lport->dev;
ctrl->cnum = idx;
ctrl->ioq_live = false;
- ctrl->assoc_active = false;
atomic_set(&ctrl->err_work_active, 0);
init_waitqueue_head(&ctrl->ioabort_wait);
diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h
new file mode 100644
index 000000000000..05ce566f2caf
--- /dev/null
+++ b/drivers/nvme/host/fc.h
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2016, Avago Technologies
+ */
+
+#ifndef _NVME_FC_TRANSPORT_H
+#define _NVME_FC_TRANSPORT_H 1
+
+
+/*
+ * Common definitions between the nvme_fc (host) transport and
+ * nvmet_fc (target) transport implementation.
+ */
+
+/*
+ * ****************** FC-NVME LS HANDLING ******************
+ */
+
+union nvmefc_ls_requests {
+ struct fcnvme_ls_rqst_w0 w0;
+ struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc;
+ struct fcnvme_ls_cr_conn_rqst rq_cr_conn;
+ struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc;
+ struct fcnvme_ls_disconnect_conn_rqst rq_dis_conn;
+} __aligned(128); /* alignment for other things alloc'd with */
+
+union nvmefc_ls_responses {
+ struct fcnvme_ls_rjt rsp_rjt;
+ struct fcnvme_ls_cr_assoc_acc rsp_cr_assoc;
+ struct fcnvme_ls_cr_conn_acc rsp_cr_conn;
+ struct fcnvme_ls_disconnect_assoc_acc rsp_dis_assoc;
+ struct fcnvme_ls_disconnect_conn_acc rsp_dis_conn;
+} __aligned(128); /* alignment for other things alloc'd with */
+
+static inline void
+nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
+{
+ struct fcnvme_ls_acc_hdr *acc = buf;
+
+ acc->w0.ls_cmd = ls_cmd;
+ acc->desc_list_len = desc_len;
+ acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST);
+ acc->rqst.desc_len =
+ fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst));
+ acc->rqst.w0.ls_cmd = rqst_ls_cmd;
+}
+
+static inline int
+nvme_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd,
+ u8 reason, u8 explanation, u8 vendor)
+{
+ struct fcnvme_ls_rjt *rjt = buf;
+
+ nvme_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST,
+ fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)),
+ ls_cmd);
+ rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT);
+ rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt));
+ rjt->rjt.reason_code = reason;
+ rjt->rjt.reason_explanation = explanation;
+ rjt->rjt.vendor = vendor;
+
+ return sizeof(struct fcnvme_ls_rjt);
+}
+
+/* Validation Error indexes into the string table below */
+enum {
+ VERR_NO_ERROR = 0,
+ VERR_CR_ASSOC_LEN = 1,
+ VERR_CR_ASSOC_RQST_LEN = 2,
+ VERR_CR_ASSOC_CMD = 3,
+ VERR_CR_ASSOC_CMD_LEN = 4,
+ VERR_ERSP_RATIO = 5,
+ VERR_ASSOC_ALLOC_FAIL = 6,
+ VERR_QUEUE_ALLOC_FAIL = 7,
+ VERR_CR_CONN_LEN = 8,
+ VERR_CR_CONN_RQST_LEN = 9,
+ VERR_ASSOC_ID = 10,
+ VERR_ASSOC_ID_LEN = 11,
+ VERR_NO_ASSOC = 12,
+ VERR_CONN_ID = 13,
+ VERR_CONN_ID_LEN = 14,
+ VERR_INVAL_CONN = 15,
+ VERR_CR_CONN_CMD = 16,
+ VERR_CR_CONN_CMD_LEN = 17,
+ VERR_DISCONN_LEN = 18,
+ VERR_DISCONN_RQST_LEN = 19,
+ VERR_DISCONN_CMD = 20,
+ VERR_DISCONN_CMD_LEN = 21,
+ VERR_DISCONN_SCOPE = 22,
+ VERR_RS_LEN = 23,
+ VERR_RS_RQST_LEN = 24,
+ VERR_RS_CMD = 25,
+ VERR_RS_CMD_LEN = 26,
+ VERR_RS_RCTL = 27,
+ VERR_RS_RO = 28,
+ VERR_LSACC = 29,
+ VERR_LSDESC_RQST = 30,
+ VERR_LSDESC_RQST_LEN = 31,
+ VERR_CR_ASSOC = 32,
+ VERR_CR_ASSOC_ACC_LEN = 33,
+ VERR_CR_CONN = 34,
+ VERR_CR_CONN_ACC_LEN = 35,
+ VERR_DISCONN = 36,
+ VERR_DISCONN_ACC_LEN = 37,
+};
+
+static char *validation_errors[] = {
+ "OK",
+ "Bad CR_ASSOC Length",
+ "Bad CR_ASSOC Rqst Length",
+ "Not CR_ASSOC Cmd",
+ "Bad CR_ASSOC Cmd Length",
+ "Bad Ersp Ratio",
+ "Association Allocation Failed",
+ "Queue Allocation Failed",
+ "Bad CR_CONN Length",
+ "Bad CR_CONN Rqst Length",
+ "Not Association ID",
+ "Bad Association ID Length",
+ "No Association",
+ "Not Connection ID",
+ "Bad Connection ID Length",
+ "Invalid Connection ID",
+ "Not CR_CONN Cmd",
+ "Bad CR_CONN Cmd Length",
+ "Bad DISCONN Length",
+ "Bad DISCONN Rqst Length",
+ "Not DISCONN Cmd",
+ "Bad DISCONN Cmd Length",
+ "Bad Disconnect Scope",
+ "Bad RS Length",
+ "Bad RS Rqst Length",
+ "Not RS Cmd",
+ "Bad RS Cmd Length",
+ "Bad RS R_CTL",
+ "Bad RS Relative Offset",
+ "Not LS_ACC",
+ "Not LSDESC_RQST",
+ "Bad LSDESC_RQST Length",
+ "Not CR_ASSOC Rqst",
+ "Bad CR_ASSOC ACC Length",
+ "Not CR_CONN Rqst",
+ "Bad CR_CONN ACC Length",
+ "Not Disconnect Rqst",
+ "Bad Disconnect ACC Length",
+};
+
+#define NVME_FC_LAST_LS_CMD_VALUE FCNVME_LS_DISCONNECT_CONN
+
+static char *nvmefc_ls_names[] = {
+ "Reserved (0)",
+ "RJT (1)",
+ "ACC (2)",
+ "Create Association",
+ "Create Connection",
+ "Disconnect Association",
+ "Disconnect Connection",
+};
+
+static inline void
+nvmefc_fmt_lsreq_discon_assoc(struct nvmefc_ls_req *lsreq,
+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst,
+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc,
+ u64 association_id)
+{
+ lsreq->rqstaddr = discon_rqst;
+ lsreq->rqstlen = sizeof(*discon_rqst);
+ lsreq->rspaddr = discon_acc;
+ lsreq->rsplen = sizeof(*discon_acc);
+ lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC;
+
+ discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC;
+ discon_rqst->desc_list_len = cpu_to_be32(
+ sizeof(struct fcnvme_lsdesc_assoc_id) +
+ sizeof(struct fcnvme_lsdesc_disconn_cmd));
+
+ discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID);
+ discon_rqst->associd.desc_len =
+ fcnvme_lsdesc_len(
+ sizeof(struct fcnvme_lsdesc_assoc_id));
+
+ discon_rqst->associd.association_id = cpu_to_be64(association_id);
+
+ discon_rqst->discon_cmd.desc_tag = cpu_to_be32(
+ FCNVME_LSDESC_DISCONN_CMD);
+ discon_rqst->discon_cmd.desc_len =
+ fcnvme_lsdesc_len(
+ sizeof(struct fcnvme_lsdesc_disconn_cmd));
+}
+
+static inline int
+nvmefc_vldt_lsreq_discon_assoc(u32 rqstlen,
+ struct fcnvme_ls_disconnect_assoc_rqst *rqst)
+{
+ int ret = 0;
+
+ if (rqstlen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
+ ret = VERR_DISCONN_LEN;
+ else if (rqst->desc_list_len !=
+ fcnvme_lsdesc_len(
+ sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
+ ret = VERR_DISCONN_RQST_LEN;
+ else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
+ ret = VERR_ASSOC_ID;
+ else if (rqst->associd.desc_len !=
+ fcnvme_lsdesc_len(
+ sizeof(struct fcnvme_lsdesc_assoc_id)))
+ ret = VERR_ASSOC_ID_LEN;
+ else if (rqst->discon_cmd.desc_tag !=
+ cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD))
+ ret = VERR_DISCONN_CMD;
+ else if (rqst->discon_cmd.desc_len !=
+ fcnvme_lsdesc_len(
+ sizeof(struct fcnvme_lsdesc_disconn_cmd)))
+ ret = VERR_DISCONN_CMD_LEN;
+ /*
+ * As the standard changed on the LS, check if old format and scope
+ * something other than Association (e.g. 0).
+ */
+ else if (rqst->discon_cmd.rsvd8[0])
+ ret = VERR_DISCONN_SCOPE;
+
+ return ret;
+}
+
+#endif /* _NVME_FC_TRANSPORT_H */
diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
index ec46693f6b64..69608755d415 100644
--- a/drivers/nvme/host/lightnvm.c
+++ b/drivers/nvme/host/lightnvm.c
@@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl {
__le32 tdresv;
__le32 thresv;
__le32 rsvd2[8];
- __u8 blk[0];
+ __u8 blk[];
};
struct nvme_nvm_id20_addrf {
@@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node)
geo = &dev->geo;
geo->csecs = 1 << ns->lba_shift;
geo->sos = ns->ms;
- geo->ext = ns->ext;
+ if (ns->features & NVME_NS_EXT_LBAS)
+ geo->ext = true;
+ else
+ geo->ext = false;
geo->mdts = ns->ctrl->max_hw_sectors;
dev->q = q;
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 54603bd3e02d..da78e499947a 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -3,6 +3,7 @@
* Copyright (c) 2017-2018 Christoph Hellwig.
*/
+#include <linux/backing-dev.h>
#include <linux/moduleparam.h>
#include <trace/events/block.h>
#include "nvme.h"
@@ -293,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head)
static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
struct bio *bio)
{
- struct nvme_ns_head *head = q->queuedata;
+ struct nvme_ns_head *head = bio->bi_disk->private_data;
struct device *dev = disk_to_dev(head->disk);
struct nvme_ns *ns;
blk_qc_t ret = BLK_QC_T_NONE;
@@ -371,13 +372,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
* We also do this for private namespaces as the namespace sharing data could
* change after a rescan.
*/
- if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath)
+ if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath)
return 0;
q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node);
if (!q)
goto out;
- q->queuedata = head;
blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
/* set to a default value for 512 until disk is validated */
blk_queue_logical_block_size(q, 512);
@@ -666,6 +666,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id)
nvme_mpath_set_live(ns);
mutex_unlock(&ns->head->lock);
}
+
+ if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) {
+ struct backing_dev_info *info =
+ ns->head->disk->queue->backing_dev_info;
+
+ info->capabilities |= BDI_CAP_STABLE_WRITES;
+ }
}
void nvme_mpath_remove_disk(struct nvme_ns_head *head)
@@ -687,7 +694,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
int error;
/* check if multipath is enabled and we have the capability */
- if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
+ if (!multipath || !ctrl->subsys ||
+ !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
return 0;
ctrl->anacap = id->anacap;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 2e04a36296d9..fa5c75501049 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -16,6 +16,7 @@
#include <linux/fault-inject.h>
#include <linux/rcupdate.h>
#include <linux/wait.h>
+#include <linux/t10-pi.h>
#include <trace/events/block.h>
@@ -30,8 +31,10 @@ extern unsigned int admin_timeout;
#ifdef CONFIG_ARCH_NO_SG_CHAIN
#define NVME_INLINE_SG_CNT 0
+#define NVME_INLINE_METADATA_SG_CNT 0
#else
#define NVME_INLINE_SG_CNT 2
+#define NVME_INLINE_METADATA_SG_CNT 1
#endif
extern struct workqueue_struct *nvme_wq;
@@ -228,6 +231,7 @@ struct nvme_ctrl {
u32 page_size;
u32 max_hw_sectors;
u32 max_segments;
+ u32 max_integrity_segments;
u16 crdt[3];
u16 oncs;
u16 oacs;
@@ -352,6 +356,7 @@ struct nvme_ns_head {
struct nvme_ns_ids ids;
struct list_head entry;
struct kref ref;
+ bool shared;
int instance;
#ifdef CONFIG_NVME_MULTIPATH
struct gendisk *disk;
@@ -363,6 +368,11 @@ struct nvme_ns_head {
#endif
};
+enum nvme_ns_features {
+ NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */
+ NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */
+};
+
struct nvme_ns {
struct list_head list;
@@ -382,18 +392,23 @@ struct nvme_ns {
u16 ms;
u16 sgs;
u32 sws;
- bool ext;
u8 pi_type;
+ unsigned long features;
unsigned long flags;
#define NVME_NS_REMOVING 0
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
- u16 noiob;
struct nvme_fault_inject fault_inject;
};
+/* NVMe ns supports metadata actions by the controller (generate/strip) */
+static inline bool nvme_ns_has_pi(struct nvme_ns *ns)
+{
+ return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple);
+}
+
struct nvme_ctrl_ops {
const char *name;
struct module *module;
@@ -449,6 +464,14 @@ static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba)
return lba << (ns->lba_shift - SECTOR_SHIFT);
}
+/*
+ * Convert byte length to nvme's 0-based num dwords
+ */
+static inline u32 nvme_bytes_to_numd(size_t len)
+{
+ return (len >> 2) - 1;
+}
+
static inline void nvme_end_request(struct request *req, __le16 status,
union nvme_result result)
{
@@ -489,7 +512,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
-void nvme_put_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_identify(struct nvme_ctrl *ctrl);
void nvme_remove_namespaces(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index cc46e250fcac..d690d5593a80 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -68,14 +68,30 @@ static int io_queue_depth = 1024;
module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+static int io_queue_count_set(const char *val, const struct kernel_param *kp)
+{
+ unsigned int n;
+ int ret;
+
+ ret = kstrtouint(val, 10, &n);
+ if (ret != 0 || n > num_possible_cpus())
+ return -EINVAL;
+ return param_set_uint(val, kp);
+}
+
+static const struct kernel_param_ops io_queue_count_ops = {
+ .set = io_queue_count_set,
+ .get = param_get_uint,
+};
+
static unsigned int write_queues;
-module_param(write_queues, uint, 0644);
+module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644);
MODULE_PARM_DESC(write_queues,
"Number of queues to use for writes. If not set, reads and writes "
"will share a queue set.");
static unsigned int poll_queues;
-module_param(poll_queues, uint, 0644);
+module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
struct nvme_dev;
@@ -128,6 +144,9 @@ struct nvme_dev {
dma_addr_t host_mem_descs_dma;
struct nvme_host_mem_buf_desc *host_mem_descs;
void **host_mem_desc_bufs;
+ unsigned int nr_allocated_queues;
+ unsigned int nr_write_queues;
+ unsigned int nr_poll_queues;
};
static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
@@ -166,14 +185,13 @@ struct nvme_queue {
void *sq_cmds;
/* only used for poll queues: */
spinlock_t cq_poll_lock ____cacheline_aligned_in_smp;
- volatile struct nvme_completion *cqes;
+ struct nvme_completion *cqes;
dma_addr_t sq_dma_addr;
dma_addr_t cq_dma_addr;
u32 __iomem *q_db;
u16 q_depth;
u16 cq_vector;
u16 sq_tail;
- u16 last_sq_tail;
u16 cq_head;
u16 qid;
u8 cq_phase;
@@ -209,25 +227,14 @@ struct nvme_iod {
struct scatterlist *sg;
};
-static unsigned int max_io_queues(void)
+static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev)
{
- return num_possible_cpus() + write_queues + poll_queues;
-}
-
-static unsigned int max_queue_count(void)
-{
- /* IO queues + admin queue */
- return 1 + max_io_queues();
-}
-
-static inline unsigned int nvme_dbbuf_size(u32 stride)
-{
- return (max_queue_count() * 8 * stride);
+ return dev->nr_allocated_queues * 8 * dev->db_stride;
}
static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
{
- unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+ unsigned int mem_size = nvme_dbbuf_size(dev);
if (dev->dbbuf_dbs)
return 0;
@@ -252,7 +259,7 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev)
static void nvme_dbbuf_dma_free(struct nvme_dev *dev)
{
- unsigned int mem_size = nvme_dbbuf_size(dev->db_stride);
+ unsigned int mem_size = nvme_dbbuf_size(dev);
if (dev->dbbuf_dbs) {
dma_free_coherent(dev->dev, mem_size,
@@ -446,24 +453,11 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
return 0;
}
-/*
- * Write sq tail if we are asked to, or if the next command would wrap.
- */
-static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq)
+static inline void nvme_write_sq_db(struct nvme_queue *nvmeq)
{
- if (!write_sq) {
- u16 next_tail = nvmeq->sq_tail + 1;
-
- if (next_tail == nvmeq->q_depth)
- next_tail = 0;
- if (next_tail != nvmeq->last_sq_tail)
- return;
- }
-
if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail,
nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei))
writel(nvmeq->sq_tail, nvmeq->q_db);
- nvmeq->last_sq_tail = nvmeq->sq_tail;
}
/**
@@ -480,7 +474,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd,
cmd, sizeof(*cmd));
if (++nvmeq->sq_tail == nvmeq->q_depth)
nvmeq->sq_tail = 0;
- nvme_write_sq_db(nvmeq, write_sq);
+ if (write_sq)
+ nvme_write_sq_db(nvmeq);
spin_unlock(&nvmeq->sq_lock);
}
@@ -489,8 +484,7 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx)
struct nvme_queue *nvmeq = hctx->driver_data;
spin_lock(&nvmeq->sq_lock);
- if (nvmeq->sq_tail != nvmeq->last_sq_tail)
- nvme_write_sq_db(nvmeq, true);
+ nvme_write_sq_db(nvmeq);
spin_unlock(&nvmeq->sq_lock);
}
@@ -922,8 +916,9 @@ static void nvme_pci_complete_rq(struct request *req)
/* We read the CQE phase first to check if the rest of the entry is valid */
static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq)
{
- return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) ==
- nvmeq->cq_phase;
+ struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head];
+
+ return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase;
}
static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq)
@@ -944,7 +939,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq)
static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx)
{
- volatile struct nvme_completion *cqe = &nvmeq->cqes[idx];
+ struct nvme_completion *cqe = &nvmeq->cqes[idx];
struct request *req;
if (unlikely(cqe->command_id >= nvmeq->q_depth)) {
@@ -1501,7 +1496,6 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
struct nvme_dev *dev = nvmeq->dev;
nvmeq->sq_tail = 0;
- nvmeq->last_sq_tail = 0;
nvmeq->cq_head = 0;
nvmeq->cq_phase = 1;
nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
@@ -2003,7 +1997,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
{
struct nvme_dev *dev = affd->priv;
- unsigned int nr_read_queues;
+ unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues;
/*
* If there is no interupt available for queues, ensure that
@@ -2019,12 +2013,12 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs)
if (!nrirqs) {
nrirqs = 1;
nr_read_queues = 0;
- } else if (nrirqs == 1 || !write_queues) {
+ } else if (nrirqs == 1 || !nr_write_queues) {
nr_read_queues = 0;
- } else if (write_queues >= nrirqs) {
+ } else if (nr_write_queues >= nrirqs) {
nr_read_queues = 1;
} else {
- nr_read_queues = nrirqs - write_queues;
+ nr_read_queues = nrirqs - nr_write_queues;
}
dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues;
@@ -2048,7 +2042,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
* Poll queues don't need interrupts, but we need at least one IO
* queue left over for non-polled IO.
*/
- this_p_queues = poll_queues;
+ this_p_queues = dev->nr_poll_queues;
if (this_p_queues >= nr_io_queues) {
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
@@ -2078,14 +2072,25 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
__nvme_disable_io_queues(dev, nvme_admin_delete_cq);
}
+static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
+{
+ return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues;
+}
+
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = &dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int result, nr_io_queues;
+ unsigned int nr_io_queues;
unsigned long size;
+ int result;
- nr_io_queues = max_io_queues();
+ /*
+ * Sample the module parameters once at reset time so that we have
+ * stable values to work with.
+ */
+ dev->nr_write_queues = write_queues;
+ dev->nr_poll_queues = poll_queues;
/*
* If tags are shared with admin queue (Apple bug), then
@@ -2093,6 +2098,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
*/
if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS)
nr_io_queues = 1;
+ else
+ nr_io_queues = min(nvme_max_io_queues(dev),
+ dev->nr_allocated_queues - 1);
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
if (result < 0)
@@ -2565,6 +2573,12 @@ static void nvme_reset_work(struct work_struct *work)
goto out;
}
+ /*
+ * We do not support an SGL for metadata (yet), so we are limited to a
+ * single integrity segment for the separate metadata pointer.
+ */
+ dev->ctrl.max_integrity_segments = 1;
+
result = nvme_init_identify(&dev->ctrl);
if (result)
goto out;
@@ -2767,8 +2781,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (!dev)
return -ENOMEM;
- dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue),
- GFP_KERNEL, node);
+ dev->nr_write_queues = write_queues;
+ dev->nr_poll_queues = poll_queues;
+ dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1;
+ dev->queues = kcalloc_node(dev->nr_allocated_queues,
+ sizeof(struct nvme_queue), GFP_KERNEL, node);
if (!dev->queues)
goto free;
@@ -3131,8 +3148,6 @@ static int __init nvme_init(void)
BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
- write_queues = min(write_queues, num_possible_cpus());
- poll_queues = min(poll_queues, num_possible_cpus());
return pci_register_driver(&nvme_driver);
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index cac8a930396a..f8f856dc0c67 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -34,6 +34,11 @@
#define NVME_RDMA_MAX_INLINE_SEGMENTS 4
+#define NVME_RDMA_DATA_SGL_SIZE \
+ (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT)
+#define NVME_RDMA_METADATA_SGL_SIZE \
+ (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT)
+
struct nvme_rdma_device {
struct ib_device *dev;
struct ib_pd *pd;
@@ -48,6 +53,11 @@ struct nvme_rdma_qe {
u64 dma;
};
+struct nvme_rdma_sgl {
+ int nents;
+ struct sg_table sg_table;
+};
+
struct nvme_rdma_queue;
struct nvme_rdma_request {
struct nvme_request req;
@@ -58,12 +68,12 @@ struct nvme_rdma_request {
refcount_t ref;
struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
u32 num_sge;
- int nents;
struct ib_reg_wr reg_wr;
struct ib_cqe reg_cqe;
struct nvme_rdma_queue *queue;
- struct sg_table sg_table;
- struct scatterlist first_sgl[];
+ struct nvme_rdma_sgl data_sgl;
+ struct nvme_rdma_sgl *metadata_sgl;
+ bool use_sig_mr;
};
enum nvme_rdma_queue_flags {
@@ -85,6 +95,7 @@ struct nvme_rdma_queue {
struct rdma_cm_id *cm_id;
int cm_error;
struct completion cm_done;
+ bool pi_support;
};
struct nvme_rdma_ctrl {
@@ -261,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
init_attr.qp_type = IB_QPT_RC;
init_attr.send_cq = queue->ib_cq;
init_attr.recv_cq = queue->ib_cq;
+ if (queue->pi_support)
+ init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr);
@@ -290,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
if (!req->sqe.data)
return -ENOMEM;
+ /* metadata nvme_rdma_sgl struct is located after command's data SGL */
+ if (queue->pi_support)
+ req->metadata_sgl = (void *)nvme_req(rq) +
+ sizeof(struct nvme_rdma_request) +
+ NVME_RDMA_DATA_SGL_SIZE;
+
req->queue = queue;
return 0;
@@ -400,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
dev = queue->device;
ibdev = dev->dev;
+ if (queue->pi_support)
+ ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs);
ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
/*
@@ -416,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
nvme_rdma_dev_put(dev);
}
-static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
+static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support)
{
- return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
- ibdev->attrs.max_fast_reg_page_list_len - 1);
+ u32 max_page_list_len;
+
+ if (pi_support)
+ max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len;
+ else
+ max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len;
+
+ return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1);
}
static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
@@ -476,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
* misaligned we'll end up using two entries for a single data page,
* so one additional entry is required.
*/
- pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;
+ pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1;
ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
queue->queue_size,
IB_MR_TYPE_MEM_REG,
@@ -488,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
goto out_destroy_ring;
}
+ if (queue->pi_support) {
+ ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs,
+ queue->queue_size, IB_MR_TYPE_INTEGRITY,
+ pages_per_mr, pages_per_mr);
+ if (ret) {
+ dev_err(queue->ctrl->ctrl.device,
+ "failed to initialize PI MR pool sized %d for QID %d\n",
+ queue->queue_size, idx);
+ goto out_destroy_mr_pool;
+ }
+ }
+
set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
return 0;
+out_destroy_mr_pool:
+ ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
out_destroy_ring:
nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
sizeof(struct nvme_completion), DMA_FROM_DEVICE);
@@ -513,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
queue = &ctrl->queues[idx];
queue->ctrl = ctrl;
+ if (idx && ctrl->ctrl.max_integrity_segments)
+ queue->pi_support = true;
+ else
+ queue->pi_support = false;
init_completion(&queue->cm_done);
if (idx > 0)
@@ -723,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = nctrl->numa_node;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
+ NVME_RDMA_DATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = 1;
set->timeout = ADMIN_TIMEOUT;
@@ -737,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
set->numa_node = nctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
set->cmd_size = sizeof(struct nvme_rdma_request) +
- NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
+ NVME_RDMA_DATA_SGL_SIZE;
+ if (nctrl->max_integrity_segments)
+ set->cmd_size += sizeof(struct nvme_rdma_sgl) +
+ NVME_RDMA_METADATA_SGL_SIZE;
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
set->timeout = NVME_IO_TIMEOUT;
@@ -770,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
bool new)
{
+ bool pi_capable = false;
int error;
error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH);
@@ -779,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->device = ctrl->queues[0].device;
ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device);
- ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
+ /* T10-PI support */
+ if (ctrl->device->dev->attrs.device_cap_flags &
+ IB_DEVICE_INTEGRITY_HANDOVER)
+ pi_capable = true;
+
+ ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev,
+ pi_capable);
/*
* Bind the async event SQE DMA mapping to the admin queue lifetime.
@@ -821,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->ctrl.max_segments = ctrl->max_fr_pages;
ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);
+ if (pi_capable)
+ ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages;
+ else
+ ctrl->ctrl.max_integrity_segments = 0;
blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@ -1149,17 +1208,29 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
+ struct list_head *pool = &queue->qp->rdma_mrs;
if (!blk_rq_nr_phys_segments(rq))
return;
+ if (blk_integrity_rq(rq)) {
+ ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents, rq_dma_dir(rq));
+ sg_free_table_chained(&req->metadata_sgl->sg_table,
+ NVME_INLINE_METADATA_SG_CNT);
+ }
+
+ if (req->use_sig_mr)
+ pool = &queue->qp->sig_mrs;
+
if (req->mr) {
- ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
+ ib_mr_pool_put(queue->qp, pool, req->mr);
req->mr = NULL;
}
- ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
- sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
+ ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+ rq_dma_dir(rq));
+ sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
}
static int nvme_rdma_set_sg_null(struct nvme_command *c)
@@ -1178,7 +1249,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue,
int count)
{
struct nvme_sgl_desc *sg = &c->common.dptr.sgl;
- struct scatterlist *sgl = req->sg_table.sgl;
+ struct scatterlist *sgl = req->data_sgl.sg_table.sgl;
struct ib_sge *sge = &req->sge[1];
u32 len = 0;
int i;
@@ -1203,8 +1274,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue,
{
struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
- sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl));
- put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length);
+ sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl));
+ put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length);
put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key);
sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
return 0;
@@ -1225,7 +1296,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
* Align the MR to a 4K page size to match the ctrl page size and
* the block virtual boundary.
*/
- nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
+ nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL,
+ SZ_4K);
if (unlikely(nr < count)) {
ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
req->mr = NULL;
@@ -1256,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
return 0;
}
+static void nvme_rdma_set_sig_domain(struct blk_integrity *bi,
+ struct nvme_command *cmd, struct ib_sig_domain *domain,
+ u16 control, u8 pi_type)
+{
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.bg_type = IB_T10DIF_CRC;
+ domain->sig.dif.pi_interval = 1 << bi->interval_exp;
+ domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ domain->sig.dif.ref_remap = true;
+
+ domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
+ domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+ domain->sig.dif.app_escape = true;
+ if (pi_type == NVME_NS_DPS_PI_TYPE3)
+ domain->sig.dif.ref_escape = true;
+}
+
+static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi,
+ struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs,
+ u8 pi_type)
+{
+ u16 control = le16_to_cpu(cmd->rw.control);
+
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+ if (control & NVME_RW_PRINFO_PRACT) {
+ /* for WRITE_INSERT/READ_STRIP no memory domain */
+ sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE;
+ nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+ pi_type);
+ /* Clear the PRACT bit since HCA will generate/verify the PI */
+ control &= ~NVME_RW_PRINFO_PRACT;
+ cmd->rw.control = cpu_to_le16(control);
+ } else {
+ /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
+ nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+ pi_type);
+ nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ }
+}
+
+static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask)
+{
+ *mask = 0;
+ if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF)
+ *mask |= IB_SIG_CHECK_REFTAG;
+ if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD)
+ *mask |= IB_SIG_CHECK_GUARD;
+}
+
+static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ if (unlikely(wc->status != IB_WC_SUCCESS))
+ nvme_rdma_wr_error(cq, wc, "SIG");
+}
+
+static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue,
+ struct nvme_rdma_request *req, struct nvme_command *c,
+ int count, int pi_count)
+{
+ struct nvme_rdma_sgl *sgl = &req->data_sgl;
+ struct ib_reg_wr *wr = &req->reg_wr;
+ struct request *rq = blk_mq_rq_from_pdu(req);
+ struct nvme_ns *ns = rq->q->queuedata;
+ struct bio *bio = rq->bio;
+ struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
+ int nr;
+
+ req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs);
+ if (WARN_ON_ONCE(!req->mr))
+ return -EAGAIN;
+
+ nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL,
+ req->metadata_sgl->sg_table.sgl, pi_count, NULL,
+ SZ_4K);
+ if (unlikely(nr))
+ goto mr_put;
+
+ nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c,
+ req->mr->sig_attrs, ns->pi_type);
+ nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask);
+
+ ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey));
+
+ req->reg_cqe.done = nvme_rdma_sig_done;
+ memset(wr, 0, sizeof(*wr));
+ wr->wr.opcode = IB_WR_REG_MR_INTEGRITY;
+ wr->wr.wr_cqe = &req->reg_cqe;
+ wr->wr.num_sge = 0;
+ wr->wr.send_flags = 0;
+ wr->mr = req->mr;
+ wr->key = req->mr->rkey;
+ wr->access = IB_ACCESS_LOCAL_WRITE |
+ IB_ACCESS_REMOTE_READ |
+ IB_ACCESS_REMOTE_WRITE;
+
+ sg->addr = cpu_to_le64(req->mr->iova);
+ put_unaligned_le24(req->mr->length, sg->length);
+ put_unaligned_le32(req->mr->rkey, sg->key);
+ sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4;
+
+ return 0;
+
+mr_put:
+ ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr);
+ req->mr = NULL;
+ if (nr < 0)
+ return nr;
+ return -EINVAL;
+}
+
static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct request *rq, struct nvme_command *c)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
+ int pi_count = 0;
int count, ret;
req->num_sge = 1;
@@ -1272,22 +1457,52 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
if (!blk_rq_nr_phys_segments(rq))
return nvme_rdma_set_sg_null(c);
- req->sg_table.sgl = req->first_sgl;
- ret = sg_alloc_table_chained(&req->sg_table,
- blk_rq_nr_phys_segments(rq), req->sg_table.sgl,
+ req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1);
+ ret = sg_alloc_table_chained(&req->data_sgl.sg_table,
+ blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl,
NVME_INLINE_SG_CNT);
if (ret)
return -ENOMEM;
- req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
+ req->data_sgl.nents = blk_rq_map_sg(rq->q, rq,
+ req->data_sgl.sg_table.sgl);
- count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
- rq_dma_dir(rq));
+ count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl,
+ req->data_sgl.nents, rq_dma_dir(rq));
if (unlikely(count <= 0)) {
ret = -EIO;
goto out_free_table;
}
+ if (blk_integrity_rq(rq)) {
+ req->metadata_sgl->sg_table.sgl =
+ (struct scatterlist *)(req->metadata_sgl + 1);
+ ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table,
+ blk_rq_count_integrity_sg(rq->q, rq->bio),
+ req->metadata_sgl->sg_table.sgl,
+ NVME_INLINE_METADATA_SG_CNT);
+ if (unlikely(ret)) {
+ ret = -ENOMEM;
+ goto out_unmap_sg;
+ }
+
+ req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q,
+ rq->bio, req->metadata_sgl->sg_table.sgl);
+ pi_count = ib_dma_map_sg(ibdev,
+ req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents,
+ rq_dma_dir(rq));
+ if (unlikely(pi_count <= 0)) {
+ ret = -EIO;
+ goto out_free_pi_table;
+ }
+ }
+
+ if (req->use_sig_mr) {
+ ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count);
+ goto out;
+ }
+
if (count <= dev->num_inline_segments) {
if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
queue->ctrl->use_inline_data &&
@@ -1306,14 +1521,23 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
ret = nvme_rdma_map_sg_fr(queue, req, c, count);
out:
if (unlikely(ret))
- goto out_unmap_sg;
+ goto out_unmap_pi_sg;
return 0;
+out_unmap_pi_sg:
+ if (blk_integrity_rq(rq))
+ ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl,
+ req->metadata_sgl->nents, rq_dma_dir(rq));
+out_free_pi_table:
+ if (blk_integrity_rq(rq))
+ sg_free_table_chained(&req->metadata_sgl->sg_table,
+ NVME_INLINE_METADATA_SG_CNT);
out_unmap_sg:
- ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq));
+ ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents,
+ rq_dma_dir(rq));
out_free_table:
- sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT);
+ sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT);
return ret;
}
@@ -1761,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ queue->pi_support &&
+ (c->common.opcode == nvme_cmd_write ||
+ c->common.opcode == nvme_cmd_read) &&
+ nvme_ns_has_pi(ns))
+ req->use_sig_mr = true;
+ else
+ req->use_sig_mr = false;
+
err = nvme_rdma_map_data(queue, rq, c);
if (unlikely(err < 0)) {
dev_err(queue->ctrl->ctrl.device,
@@ -1801,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
return ib_process_cq_direct(queue->ib_cq, -1);
}
+static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req)
+{
+ struct request *rq = blk_mq_rq_from_pdu(req);
+ struct ib_mr_status mr_status;
+ int ret;
+
+ ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+ if (ret) {
+ pr_err("ib_check_mr_status failed, ret %d\n", ret);
+ nvme_req(rq)->status = NVME_SC_INVALID_PI;
+ return;
+ }
+
+ if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+ switch (mr_status.sig_err.err_type) {
+ case IB_SIG_BAD_GUARD:
+ nvme_req(rq)->status = NVME_SC_GUARD_CHECK;
+ break;
+ case IB_SIG_BAD_REFTAG:
+ nvme_req(rq)->status = NVME_SC_REFTAG_CHECK;
+ break;
+ case IB_SIG_BAD_APPTAG:
+ nvme_req(rq)->status = NVME_SC_APPTAG_CHECK;
+ break;
+ }
+ pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
+ mr_status.sig_err.err_type, mr_status.sig_err.expected,
+ mr_status.sig_err.actual);
+ }
+}
+
static void nvme_rdma_complete_rq(struct request *rq)
{
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct ib_device *ibdev = queue->device->dev;
+ if (req->use_sig_mr)
+ nvme_rdma_check_pi_status(req);
+
nvme_rdma_unmap_data(queue, rq);
ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
@@ -1926,7 +2193,7 @@ out_fail:
static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.name = "rdma",
.module = THIS_MODULE,
- .flags = NVME_F_FABRICS,
+ .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED,
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c15a92163c1f..7c7c1886642f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -60,6 +60,7 @@ struct nvme_tcp_request {
enum nvme_tcp_queue_flags {
NVME_TCP_Q_ALLOCATED = 0,
NVME_TCP_Q_LIVE = 1,
+ NVME_TCP_Q_POLLING = 2,
};
enum nvme_tcp_recv_state {
@@ -75,6 +76,7 @@ struct nvme_tcp_queue {
int io_cpu;
spinlock_t lock;
+ struct mutex send_mutex;
struct list_head send_list;
/* recv state */
@@ -131,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
static struct workqueue_struct *nvme_tcp_wq;
static struct blk_mq_ops nvme_tcp_mq_ops;
static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
{
@@ -257,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req,
}
}
-static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req)
+static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
+ bool sync)
{
struct nvme_tcp_queue *queue = req->queue;
+ bool empty;
spin_lock(&queue->lock);
+ empty = list_empty(&queue->send_list) && !queue->request;
list_add_tail(&req->entry, &queue->send_list);
spin_unlock(&queue->lock);
- queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ /*
+ * if we're the first on the send_list and we can try to send
+ * directly, otherwise queue io_work. Also, only do that if we
+ * are on the same cpu, so we don't introduce contention.
+ */
+ if (queue->io_cpu == smp_processor_id() &&
+ sync && empty && mutex_trylock(&queue->send_mutex)) {
+ nvme_tcp_try_send(queue);
+ mutex_unlock(&queue->send_mutex);
+ } else {
+ queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
+ }
}
static inline struct nvme_tcp_request *
@@ -578,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue,
req->state = NVME_TCP_SEND_H2C_PDU;
req->offset = 0;
- nvme_tcp_queue_request(req);
+ nvme_tcp_queue_request(req, false);
return 0;
}
@@ -794,11 +811,12 @@ static void nvme_tcp_data_ready(struct sock *sk)
{
struct nvme_tcp_queue *queue;
- read_lock(&sk->sk_callback_lock);
+ read_lock_bh(&sk->sk_callback_lock);
queue = sk->sk_user_data;
- if (likely(queue && queue->rd_enabled))
+ if (likely(queue && queue->rd_enabled) &&
+ !test_bit(NVME_TCP_Q_POLLING, &queue->flags))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
- read_unlock(&sk->sk_callback_lock);
+ read_unlock_bh(&sk->sk_callback_lock);
}
static void nvme_tcp_write_space(struct sock *sk)
@@ -867,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req)
if (last && !queue->data_digest)
flags |= MSG_EOR;
else
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
/* can't zcopy slab pages */
if (unlikely(PageSlab(page))) {
@@ -906,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req)
struct nvme_tcp_queue *queue = req->queue;
struct nvme_tcp_cmd_pdu *pdu = req->pdu;
bool inline_data = nvme_tcp_has_inline_data(req);
- int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR);
u8 hdgst = nvme_tcp_hdgst_len(queue);
int len = sizeof(*pdu) + hdgst - req->offset;
+ int flags = MSG_DONTWAIT;
int ret;
+ if (inline_data)
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ else
+ flags |= MSG_EOR;
+
if (queue->hdr_digest && !req->offset)
nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu));
@@ -949,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req)
ret = kernel_sendpage(queue->sock, virt_to_page(pdu),
offset_in_page(pdu) + req->offset, len,
- MSG_DONTWAIT | MSG_MORE);
+ MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (unlikely(ret <= 0))
return ret;
@@ -1063,11 +1086,14 @@ static void nvme_tcp_io_work(struct work_struct *w)
bool pending = false;
int result;
- result = nvme_tcp_try_send(queue);
- if (result > 0)
- pending = true;
- else if (unlikely(result < 0))
- break;
+ if (mutex_trylock(&queue->send_mutex)) {
+ result = nvme_tcp_try_send(queue);
+ mutex_unlock(&queue->send_mutex);
+ if (result > 0)
+ pending = true;
+ else if (unlikely(result < 0))
+ break;
+ }
result = nvme_tcp_try_recv(queue);
if (result > 0)
@@ -1319,6 +1345,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
queue->ctrl = ctrl;
INIT_LIST_HEAD(&queue->send_list);
spin_lock_init(&queue->lock);
+ mutex_init(&queue->send_mutex);
INIT_WORK(&queue->io_work, nvme_tcp_io_work);
queue->queue_size = queue_size;
@@ -1543,6 +1570,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
set->reserved_tags = 2; /* connect + keep-alive */
set->numa_node = NUMA_NO_NODE;
+ set->flags = BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
set->nr_hw_queues = 1;
@@ -1554,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
set->queue_depth = nctrl->sqsize + 1;
set->reserved_tags = 1; /* fabric connect */
set->numa_node = NUMA_NO_NODE;
- set->flags = BLK_MQ_F_SHOULD_MERGE;
+ set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
set->cmd_size = sizeof(struct nvme_tcp_request);
set->driver_data = ctrl;
set->nr_hw_queues = nctrl->queue_count - 1;
@@ -2113,7 +2141,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg)
ctrl->async_req.curr_bio = NULL;
ctrl->async_req.data_len = 0;
- nvme_tcp_queue_request(&ctrl->async_req);
+ nvme_tcp_queue_request(&ctrl->async_req, true);
}
static enum blk_eh_timer_return
@@ -2244,7 +2272,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
blk_mq_start_request(rq);
- nvme_tcp_queue_request(req);
+ nvme_tcp_queue_request(req, true);
return BLK_STS_OK;
}
@@ -2302,9 +2330,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags))
return 0;
+ set_bit(NVME_TCP_Q_POLLING, &queue->flags);
if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue))
sk_busy_loop(sk, true);
nvme_tcp_try_recv(queue);
+ clear_bit(NVME_TCP_Q_POLLING, &queue->flags);
return queue->nr_cqe;
}
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index d7f48c0fb311..4474952d64c6 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -4,6 +4,7 @@ config NVME_TARGET
tristate "NVMe Target support"
depends on BLOCK
depends on CONFIGFS_FS
+ select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY
select SGL_ALLOC
help
This enabled target side support for the NVMe protocol, that is
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 9d6f75cfa77c..1db8c0498668 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -295,7 +295,7 @@ out:
static void nvmet_execute_get_log_page(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd)))
+ if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd)))
return;
switch (req->cmd->get_log_page.lid) {
@@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvme_id_ctrl *id;
+ u32 cmd_capsule_size;
u16 status = 0;
id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn));
- /* Max command capsule size is sqe + single page of in-capsule data */
- id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) +
- req->port->inline_data_size) / 16);
+ /*
+ * Max command capsule size is sqe + in-capsule data size.
+ * Disable in-capsule data for Metadata capable controllers.
+ */
+ cmd_capsule_size = sizeof(struct nvme_command);
+ if (!ctrl->pi_support)
+ cmd_capsule_size += req->port->inline_data_size;
+ id->ioccsz = cpu_to_le32(cmd_capsule_size / 16);
+
/* Max response capsule size is cqe */
id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16);
@@ -465,6 +472,7 @@ out:
static void nvmet_execute_identify_ns(struct nvmet_req *req)
{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
struct nvmet_ns *ns;
struct nvme_id_ns *id;
u16 status = 0;
@@ -482,10 +490,12 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
}
/* return an all zeroed buffer if we can't find an active namespace */
- ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid);
+ ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid);
if (!ns)
goto done;
+ nvmet_ns_revalidate(ns);
+
/*
* nuse = ncap = nsze isn't always true, but we have no way to find
* that out from the underlying device.
@@ -521,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req)
id->lbaf[0].ds = ns->blksize_shift;
+ if (ctrl->pi_support && nvmet_ns_has_pi(ns)) {
+ id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST |
+ NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 |
+ NVME_NS_DPC_PI_TYPE3;
+ id->mc = NVME_MC_EXTENDED_LBA;
+ id->dps = ns->pi_type;
+ id->flbas = NVME_NS_FLBAS_META_EXT;
+ id->lbaf[0].ms = cpu_to_le16(ns->metadata_size);
+ }
+
if (ns->readonly)
id->nsattr |= (1 << 0);
nvmet_put_namespace(ns);
@@ -625,7 +645,7 @@ out:
static void nvmet_execute_identify(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
switch (req->cmd->identify.cns) {
@@ -654,7 +674,7 @@ static void nvmet_execute_identify(struct nvmet_req *req)
*/
static void nvmet_execute_abort(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
nvmet_set_result(req, 1);
nvmet_req_complete(req, 0);
@@ -743,7 +763,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req)
u16 nsqr;
u16 ncqr;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@@ -815,7 +835,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 status = 0;
- if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10)))
+ if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10)))
return;
switch (cdw10 & 0xff) {
@@ -882,7 +902,7 @@ void nvmet_execute_async_event(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
mutex_lock(&ctrl->lock);
@@ -901,7 +921,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
pr_debug("ctrl %d update keep-alive timer for %d secs\n",
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index 58cabd7b6fc5..419e0d4ce79b 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -20,61 +20,71 @@ static const struct config_item_type nvmet_subsys_type;
static LIST_HEAD(nvmet_ports_list);
struct list_head *nvmet_ports = &nvmet_ports_list;
-static const struct nvmet_transport_name {
+struct nvmet_type_name_map {
u8 type;
const char *name;
-} nvmet_transport_names[] = {
+};
+
+static struct nvmet_type_name_map nvmet_transport[] = {
{ NVMF_TRTYPE_RDMA, "rdma" },
{ NVMF_TRTYPE_FC, "fc" },
{ NVMF_TRTYPE_TCP, "tcp" },
{ NVMF_TRTYPE_LOOP, "loop" },
};
+static const struct nvmet_type_name_map nvmet_addr_family[] = {
+ { NVMF_ADDR_FAMILY_PCI, "pcie" },
+ { NVMF_ADDR_FAMILY_IP4, "ipv4" },
+ { NVMF_ADDR_FAMILY_IP6, "ipv6" },
+ { NVMF_ADDR_FAMILY_IB, "ib" },
+ { NVMF_ADDR_FAMILY_FC, "fc" },
+ { NVMF_ADDR_FAMILY_LOOP, "loop" },
+};
+
+static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller)
+{
+ if (p->enabled)
+ pr_err("Disable port '%u' before changing attribute in %s\n",
+ le16_to_cpu(p->disc_addr.portid), caller);
+ return p->enabled;
+}
+
/*
* nvmet_port Generic ConfigFS definitions.
* Used in any place in the ConfigFS tree that refers to an address.
*/
-static ssize_t nvmet_addr_adrfam_show(struct config_item *item,
- char *page)
+static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page)
{
- switch (to_nvmet_port(item)->disc_addr.adrfam) {
- case NVMF_ADDR_FAMILY_IP4:
- return sprintf(page, "ipv4\n");
- case NVMF_ADDR_FAMILY_IP6:
- return sprintf(page, "ipv6\n");
- case NVMF_ADDR_FAMILY_IB:
- return sprintf(page, "ib\n");
- case NVMF_ADDR_FAMILY_FC:
- return sprintf(page, "fc\n");
- default:
- return sprintf(page, "\n");
+ u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam;
+ int i;
+
+ for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
+ if (nvmet_addr_family[i].type == adrfam)
+ return sprintf(page, "%s\n", nvmet_addr_family[i].name);
}
+
+ return sprintf(page, "\n");
}
static ssize_t nvmet_addr_adrfam_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_port *port = to_nvmet_port(item);
+ int i;
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
- if (sysfs_streq(page, "ipv4")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4;
- } else if (sysfs_streq(page, "ipv6")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6;
- } else if (sysfs_streq(page, "ib")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB;
- } else if (sysfs_streq(page, "fc")) {
- port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC;
- } else {
- pr_err("Invalid value '%s' for adrfam\n", page);
- return -EINVAL;
+ for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) {
+ if (sysfs_streq(page, nvmet_addr_family[i].name))
+ goto found;
}
+ pr_err("Invalid value '%s' for adrfam\n", page);
+ return -EINVAL;
+
+found:
+ port->disc_addr.adrfam = nvmet_addr_family[i].type;
return count;
}
@@ -100,11 +110,9 @@ static ssize_t nvmet_addr_portid_store(struct config_item *item,
return -EINVAL;
}
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
+
port->disc_addr.portid = cpu_to_le16(portid);
return count;
}
@@ -130,11 +138,8 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item,
return -EINVAL;
}
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1)
return -EINVAL;
@@ -143,20 +148,24 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, addr_traddr);
-static ssize_t nvmet_addr_treq_show(struct config_item *item,
- char *page)
+static const struct nvmet_type_name_map nvmet_addr_treq[] = {
+ { NVMF_TREQ_NOT_SPECIFIED, "not specified" },
+ { NVMF_TREQ_REQUIRED, "required" },
+ { NVMF_TREQ_NOT_REQUIRED, "not required" },
+};
+
+static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page)
{
- switch (to_nvmet_port(item)->disc_addr.treq &
- NVME_TREQ_SECURE_CHANNEL_MASK) {
- case NVMF_TREQ_NOT_SPECIFIED:
- return sprintf(page, "not specified\n");
- case NVMF_TREQ_REQUIRED:
- return sprintf(page, "required\n");
- case NVMF_TREQ_NOT_REQUIRED:
- return sprintf(page, "not required\n");
- default:
- return sprintf(page, "\n");
+ u8 treq = to_nvmet_port(item)->disc_addr.treq &
+ NVME_TREQ_SECURE_CHANNEL_MASK;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
+ if (treq == nvmet_addr_treq[i].type)
+ return sprintf(page, "%s\n", nvmet_addr_treq[i].name);
}
+
+ return sprintf(page, "\n");
}
static ssize_t nvmet_addr_treq_store(struct config_item *item,
@@ -164,25 +173,22 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item,
{
struct nvmet_port *port = to_nvmet_port(item);
u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK;
+ int i;
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
- if (sysfs_streq(page, "not specified")) {
- treq |= NVMF_TREQ_NOT_SPECIFIED;
- } else if (sysfs_streq(page, "required")) {
- treq |= NVMF_TREQ_REQUIRED;
- } else if (sysfs_streq(page, "not required")) {
- treq |= NVMF_TREQ_NOT_REQUIRED;
- } else {
- pr_err("Invalid value '%s' for treq\n", page);
- return -EINVAL;
+ for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) {
+ if (sysfs_streq(page, nvmet_addr_treq[i].name))
+ goto found;
}
- port->disc_addr.treq = treq;
+ pr_err("Invalid value '%s' for treq\n", page);
+ return -EINVAL;
+
+found:
+ treq |= nvmet_addr_treq[i].type;
+ port->disc_addr.treq = treq;
return count;
}
@@ -206,11 +212,8 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item,
pr_err("Invalid value '%s' for trsvcid\n", page);
return -EINVAL;
}
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1)
return -EINVAL;
@@ -233,11 +236,8 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
struct nvmet_port *port = to_nvmet_port(item);
int ret;
- if (port->enabled) {
- pr_err("Cannot modify inline_data_size while port enabled\n");
- pr_err("Disable the port before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
ret = kstrtoint(page, 0, &port->inline_data_size);
if (ret) {
pr_err("Invalid value '%s' for inline_data_size\n", page);
@@ -248,16 +248,45 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_, param_inline_data_size);
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_param_pi_enable_show(struct config_item *item,
+ char *page)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+
+ return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable);
+}
+
+static ssize_t nvmet_param_pi_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_port *port = to_nvmet_port(item);
+ bool val;
+
+ if (strtobool(page, &val))
+ return -EINVAL;
+
+ if (port->enabled) {
+ pr_err("Disable port before setting pi_enable value.\n");
+ return -EACCES;
+ }
+
+ port->pi_enable = val;
+ return count;
+}
+
+CONFIGFS_ATTR(nvmet_, param_pi_enable);
+#endif
+
static ssize_t nvmet_addr_trtype_show(struct config_item *item,
char *page)
{
struct nvmet_port *port = to_nvmet_port(item);
int i;
- for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
- if (port->disc_addr.trtype != nvmet_transport_names[i].type)
- continue;
- return sprintf(page, "%s\n", nvmet_transport_names[i].name);
+ for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
+ if (port->disc_addr.trtype == nvmet_transport[i].type)
+ return sprintf(page, "%s\n", nvmet_transport[i].name);
}
return sprintf(page, "\n");
@@ -276,22 +305,20 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item,
struct nvmet_port *port = to_nvmet_port(item);
int i;
- if (port->enabled) {
- pr_err("Cannot modify address while enabled\n");
- pr_err("Disable the address before modifying\n");
+ if (nvmet_is_port_enabled(port, __func__))
return -EACCES;
- }
- for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) {
- if (sysfs_streq(page, nvmet_transport_names[i].name))
+ for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) {
+ if (sysfs_streq(page, nvmet_transport[i].name))
goto found;
}
pr_err("Invalid value '%s' for trtype\n", page);
return -EINVAL;
+
found:
memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE);
- port->disc_addr.trtype = nvmet_transport_names[i].type;
+ port->disc_addr.trtype = nvmet_transport[i].type;
if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA)
nvmet_port_init_tsas_rdma(port);
return count;
@@ -327,7 +354,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item,
kfree(ns->device_path);
ret = -ENOMEM;
- ns->device_path = kstrndup(page, len, GFP_KERNEL);
+ ns->device_path = kmemdup_nul(page, len, GFP_KERNEL);
if (!ns->device_path)
goto out_unlock;
@@ -543,6 +570,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item,
CONFIGFS_ATTR(nvmet_ns_, buffered_io);
+static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_ns *ns = to_nvmet_ns(item);
+ bool val;
+
+ if (strtobool(page, &val))
+ return -EINVAL;
+
+ if (!val)
+ return -EINVAL;
+
+ mutex_lock(&ns->subsys->lock);
+ if (!ns->enabled) {
+ pr_err("enable ns before revalidate.\n");
+ mutex_unlock(&ns->subsys->lock);
+ return -EINVAL;
+ }
+ nvmet_ns_revalidate(ns);
+ mutex_unlock(&ns->subsys->lock);
+ return count;
+}
+
+CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size);
+
static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_device_path,
&nvmet_ns_attr_device_nguid,
@@ -550,6 +602,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = {
&nvmet_ns_attr_ana_grpid,
&nvmet_ns_attr_enable,
&nvmet_ns_attr_buffered_io,
+ &nvmet_ns_attr_revalidate_size,
#ifdef CONFIG_PCI_P2PDMA
&nvmet_ns_attr_p2pmem,
#endif
@@ -963,7 +1016,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
return -EINVAL;
}
- new_model_number = kstrndup(page, len, GFP_KERNEL);
+ new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
if (!new_model_number)
return -ENOMEM;
@@ -987,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
}
CONFIGFS_ATTR(nvmet_subsys_, attr_model);
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item,
+ char *page)
+{
+ return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support);
+}
+
+static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item);
+ bool pi_enable;
+
+ if (strtobool(page, &pi_enable))
+ return -EINVAL;
+
+ subsys->pi_support = pi_enable;
+ return count;
+}
+CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable);
+#endif
+
static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_allow_any_host,
&nvmet_subsys_attr_attr_version,
@@ -994,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = {
&nvmet_subsys_attr_attr_cntlid_min,
&nvmet_subsys_attr_attr_cntlid_max,
&nvmet_subsys_attr_attr_model,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ &nvmet_subsys_attr_attr_pi_enable,
+#endif
NULL,
};
@@ -1149,10 +1227,7 @@ static const struct config_item_type nvmet_referrals_type = {
.ct_group_ops = &nvmet_referral_group_ops,
};
-static struct {
- enum nvme_ana_state state;
- const char *name;
-} nvmet_ana_state_names[] = {
+static struct nvmet_type_name_map nvmet_ana_state[] = {
{ NVME_ANA_OPTIMIZED, "optimized" },
{ NVME_ANA_NONOPTIMIZED, "non-optimized" },
{ NVME_ANA_INACCESSIBLE, "inaccessible" },
@@ -1167,10 +1242,9 @@ static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item,
enum nvme_ana_state state = grp->port->ana_state[grp->grpid];
int i;
- for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
- if (state != nvmet_ana_state_names[i].state)
- continue;
- return sprintf(page, "%s\n", nvmet_ana_state_names[i].name);
+ for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
+ if (state == nvmet_ana_state[i].type)
+ return sprintf(page, "%s\n", nvmet_ana_state[i].name);
}
return sprintf(page, "\n");
@@ -1180,10 +1254,11 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
const char *page, size_t count)
{
struct nvmet_ana_group *grp = to_ana_group(item);
+ enum nvme_ana_state *ana_state = grp->port->ana_state;
int i;
- for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) {
- if (sysfs_streq(page, nvmet_ana_state_names[i].name))
+ for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) {
+ if (sysfs_streq(page, nvmet_ana_state[i].name))
goto found;
}
@@ -1192,10 +1267,9 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item,
found:
down_write(&nvmet_ana_sem);
- grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state;
+ ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type;
nvmet_ana_chgcnt++;
up_write(&nvmet_ana_sem);
-
nvmet_port_send_ana_event(grp->port);
return count;
}
@@ -1297,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = {
&nvmet_attr_addr_trsvcid,
&nvmet_attr_addr_trtype,
&nvmet_attr_param_inline_data_size,
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+ &nvmet_attr_param_pi_enable,
+#endif
NULL,
};
@@ -1346,6 +1423,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group,
port->inline_data_size = -1; /* < 0 == let the transport choose */
port->disc_addr.portid = cpu_to_le16(portid);
+ port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX;
port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW;
config_group_init_type_name(&port->group, name, &nvmet_port_type);
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b685f99d56a1..6392bcd30bd7 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
struct nvmet_async_event *aen;
struct nvmet_req *req;
- while (1) {
- mutex_lock(&ctrl->lock);
- aen = list_first_entry_or_null(&ctrl->async_events,
- struct nvmet_async_event, entry);
- if (!aen || !ctrl->nr_async_event_cmds) {
- mutex_unlock(&ctrl->lock);
- break;
- }
-
+ mutex_lock(&ctrl->lock);
+ while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) {
+ aen = list_first_entry(&ctrl->async_events,
+ struct nvmet_async_event, entry);
req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
if (status == 0)
nvmet_set_result(req, nvmet_async_event_result(aen));
@@ -151,20 +146,21 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
kfree(aen);
mutex_unlock(&ctrl->lock);
+ trace_nvmet_async_event(ctrl, req->cqe->result.u32);
nvmet_req_complete(req, status);
+ mutex_lock(&ctrl->lock);
}
+ mutex_unlock(&ctrl->lock);
}
static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
{
- struct nvmet_req *req;
+ struct nvmet_async_event *aen, *tmp;
mutex_lock(&ctrl->lock);
- while (ctrl->nr_async_event_cmds) {
- req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
- mutex_unlock(&ctrl->lock);
- nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
- mutex_lock(&ctrl->lock);
+ list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) {
+ list_del(&aen->entry);
+ kfree(aen);
}
mutex_unlock(&ctrl->lock);
}
@@ -322,12 +318,21 @@ int nvmet_enable_port(struct nvmet_port *port)
if (!try_module_get(ops->owner))
return -EINVAL;
- ret = ops->add_port(port);
- if (ret) {
- module_put(ops->owner);
- return ret;
+ /*
+ * If the user requested PI support and the transport isn't pi capable,
+ * don't enable the port.
+ */
+ if (port->pi_enable && !ops->metadata_support) {
+ pr_err("T10-PI is not supported by transport type %d\n",
+ port->disc_addr.trtype);
+ ret = -EINVAL;
+ goto out_put;
}
+ ret = ops->add_port(port);
+ if (ret)
+ goto out_put;
+
/* If the transport didn't set inline_data_size, then disable it. */
if (port->inline_data_size < 0)
port->inline_data_size = 0;
@@ -335,6 +340,10 @@ int nvmet_enable_port(struct nvmet_port *port)
port->enabled = true;
port->tr_ops = ops;
return 0;
+
+out_put:
+ module_put(ops->owner);
+ return ret;
}
void nvmet_disable_port(struct nvmet_port *port)
@@ -514,6 +523,19 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
ns->nsid);
}
+void nvmet_ns_revalidate(struct nvmet_ns *ns)
+{
+ loff_t oldsize = ns->size;
+
+ if (ns->bdev)
+ nvmet_bdev_ns_revalidate(ns);
+ else
+ nvmet_file_ns_revalidate(ns);
+
+ if (oldsize != ns->size)
+ nvmet_ns_changed(ns->subsys, ns->nsid);
+}
+
int nvmet_ns_enable(struct nvmet_ns *ns)
{
struct nvmet_subsys *subsys = ns->subsys;
@@ -764,10 +786,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
* If this is the admin queue, complete all AERs so that our
* queue doesn't have outstanding requests on it.
*/
- if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
+ if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
nvmet_async_events_process(ctrl, status);
- nvmet_async_events_free(ctrl);
- }
percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
wait_for_completion(&sq->confirm_done);
wait_for_completion(&sq->free_done);
@@ -873,8 +893,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
req->sq = sq;
req->ops = ops;
req->sg = NULL;
+ req->metadata_sg = NULL;
req->sg_cnt = 0;
+ req->metadata_sg_cnt = 0;
req->transfer_len = 0;
+ req->metadata_len = 0;
req->cqe->status = 0;
req->cqe->sq_head = 0;
req->ns = NULL;
@@ -936,9 +959,9 @@ void nvmet_req_uninit(struct nvmet_req *req)
}
EXPORT_SYMBOL_GPL(nvmet_req_uninit);
-bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len)
{
- if (unlikely(data_len != req->transfer_len)) {
+ if (unlikely(len != req->transfer_len)) {
req->error_loc = offsetof(struct nvme_common_command, dptr);
nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
return false;
@@ -946,7 +969,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len)
return true;
}
-EXPORT_SYMBOL_GPL(nvmet_check_data_len);
+EXPORT_SYMBOL_GPL(nvmet_check_transfer_len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
{
@@ -959,50 +982,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len)
return true;
}
-int nvmet_req_alloc_sgl(struct nvmet_req *req)
+static unsigned int nvmet_data_transfer_len(struct nvmet_req *req)
{
- struct pci_dev *p2p_dev = NULL;
+ return req->transfer_len - req->metadata_len;
+}
- if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
- if (req->sq->ctrl && req->ns)
- p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
- req->ns->nsid);
+static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req)
+{
+ req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt,
+ nvmet_data_transfer_len(req));
+ if (!req->sg)
+ goto out_err;
- req->p2p_dev = NULL;
- if (req->sq->qid && p2p_dev) {
- req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
- req->transfer_len);
- if (req->sg) {
- req->p2p_dev = p2p_dev;
- return 0;
- }
- }
+ if (req->metadata_len) {
+ req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev,
+ &req->metadata_sg_cnt, req->metadata_len);
+ if (!req->metadata_sg)
+ goto out_free_sg;
+ }
+ return 0;
+out_free_sg:
+ pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
+out_err:
+ return -ENOMEM;
+}
- /*
- * If no P2P memory was available we fallback to using
- * regular memory
- */
+static bool nvmet_req_find_p2p_dev(struct nvmet_req *req)
+{
+ if (!IS_ENABLED(CONFIG_PCI_P2PDMA))
+ return false;
+
+ if (req->sq->ctrl && req->sq->qid && req->ns) {
+ req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
+ req->ns->nsid);
+ if (req->p2p_dev)
+ return true;
}
- req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
+ req->p2p_dev = NULL;
+ return false;
+}
+
+int nvmet_req_alloc_sgls(struct nvmet_req *req)
+{
+ if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req))
+ return 0;
+
+ req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL,
+ &req->sg_cnt);
if (unlikely(!req->sg))
- return -ENOMEM;
+ goto out;
+
+ if (req->metadata_len) {
+ req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL,
+ &req->metadata_sg_cnt);
+ if (unlikely(!req->metadata_sg))
+ goto out_free;
+ }
return 0;
+out_free:
+ sgl_free(req->sg);
+out:
+ return -ENOMEM;
}
-EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
+EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls);
-void nvmet_req_free_sgl(struct nvmet_req *req)
+void nvmet_req_free_sgls(struct nvmet_req *req)
{
- if (req->p2p_dev)
+ if (req->p2p_dev) {
pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
- else
+ if (req->metadata_sg)
+ pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg);
+ } else {
sgl_free(req->sg);
+ if (req->metadata_sg)
+ sgl_free(req->metadata_sg);
+ }
req->sg = NULL;
+ req->metadata_sg = NULL;
req->sg_cnt = 0;
+ req->metadata_sg_cnt = 0;
}
-EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
+EXPORT_SYMBOL_GPL(nvmet_req_free_sgls);
static inline bool nvmet_cc_en(u32 cc)
{
@@ -1357,6 +1420,7 @@ static void nvmet_ctrl_free(struct kref *ref)
ida_simple_remove(&cntlid_ida, ctrl->cntlid);
+ nvmet_async_events_free(ctrl);
kfree(ctrl->sqs);
kfree(ctrl->cqs);
kfree(ctrl->changed_ns_list);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 0c2274b21e15..40cf0b6e6c9d 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req)
u16 status = 0;
void *buffer;
- if (!nvmet_check_data_len(req, data_len))
+ if (!nvmet_check_transfer_len(req, data_len))
return;
if (req->cmd->get_log_page.lid != NVME_LOG_DISC) {
@@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req)
const char model[] = "Linux";
u16 status = 0;
- if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE))
+ if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE))
return;
if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) {
@@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
@@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req)
u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10);
u16 stat = 0;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
switch (cdw10 & 0xff) {
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index feef15c38ec9..42bd12b8bf00 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req)
u64 val = le64_to_cpu(req->cmd->prop_set.value);
u16 status = 0;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_set.attrib & 1) {
@@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
u16 status = 0;
u64 val = 0;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
if (req->cmd->prop_get.attrib & 1) {
@@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
struct nvmet_ctrl *ctrl = NULL;
u16 status = 0;
- if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);
@@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
+ ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support;
+
uuid_copy(&ctrl->hostid, &d->hostid);
status = nvmet_install_queue(ctrl, req);
@@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
goto out;
}
- pr_info("creating controller %d for subsystem %s for NQN %s.\n",
- ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
+ pr_info("creating controller %d for subsystem %s for NQN %s%s.\n",
+ ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn,
+ ctrl->pi_support ? " T10-PI is enabled" : "");
req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
out:
@@ -223,7 +226,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
u16 qid = le16_to_cpu(c->qid);
u16 status = 0;
- if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data)))
+ if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data)))
return;
d = kmalloc(sizeof(*d), GFP_KERNEL);
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index a8ceb7721640..27fd3b5aa621 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -14,6 +14,7 @@
#include "nvmet.h"
#include <linux/nvme-fc-driver.h>
#include <linux/nvme-fc.h>
+#include "../host/fc.h"
/* *************************** Data Structures/Defines ****************** */
@@ -21,23 +22,21 @@
#define NVMET_LS_CTX_COUNT 256
-/* for this implementation, assume small single frame rqst/rsp */
-#define NVME_FC_MAX_LS_BUFFER_SIZE 2048
-
struct nvmet_fc_tgtport;
struct nvmet_fc_tgt_assoc;
-struct nvmet_fc_ls_iod {
- struct nvmefc_tgt_ls_req *lsreq;
+struct nvmet_fc_ls_iod { /* for an LS RQST RCV */
+ struct nvmefc_ls_rsp *lsrsp;
struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */
- struct list_head ls_list; /* tgtport->ls_list */
+ struct list_head ls_rcv_list; /* tgtport->ls_rcv_list */
struct nvmet_fc_tgtport *tgtport;
struct nvmet_fc_tgt_assoc *assoc;
+ void *hosthandle;
- u8 *rqstbuf;
- u8 *rspbuf;
+ union nvmefc_ls_requests *rqstbuf;
+ union nvmefc_ls_responses *rspbuf;
u16 rqstdatalen;
dma_addr_t rspdma;
@@ -46,6 +45,18 @@ struct nvmet_fc_ls_iod {
struct work_struct work;
} __aligned(sizeof(unsigned long long));
+struct nvmet_fc_ls_req_op { /* for an LS RQST XMT */
+ struct nvmefc_ls_req ls_req;
+
+ struct nvmet_fc_tgtport *tgtport;
+ void *hosthandle;
+
+ int ls_error;
+ struct list_head lsreq_list; /* tgtport->ls_req_list */
+ bool req_queued;
+};
+
+
/* desired maximum for a single sequence - if sg list allows it */
#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024)
@@ -83,7 +94,6 @@ struct nvmet_fc_fcp_iod {
};
struct nvmet_fc_tgtport {
-
struct nvmet_fc_target_port fc_target_port;
struct list_head tgt_list; /* nvmet_fc_target_list */
@@ -92,9 +102,11 @@ struct nvmet_fc_tgtport {
struct nvmet_fc_ls_iod *iod;
spinlock_t lock;
- struct list_head ls_list;
+ struct list_head ls_rcv_list;
+ struct list_head ls_req_list;
struct list_head ls_busylist;
struct list_head assoc_list;
+ struct list_head host_list;
struct ida assoc_cnt;
struct nvmet_fc_port_entry *pe;
struct kref ref;
@@ -136,14 +148,26 @@ struct nvmet_fc_tgt_queue {
struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */
} __aligned(sizeof(unsigned long long));
+struct nvmet_fc_hostport {
+ struct nvmet_fc_tgtport *tgtport;
+ void *hosthandle;
+ struct list_head host_list;
+ struct kref ref;
+ u8 invalid;
+};
+
struct nvmet_fc_tgt_assoc {
u64 association_id;
u32 a_id;
+ atomic_t terminating;
struct nvmet_fc_tgtport *tgtport;
+ struct nvmet_fc_hostport *hostport;
+ struct nvmet_fc_ls_iod *rcv_disconn;
struct list_head a_list;
struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1];
struct kref ref;
struct work_struct del_work;
+ atomic_t del_work_active;
};
@@ -227,6 +251,8 @@ static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport);
static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_fcp_iod *fod);
static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc);
+static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_ls_iod *iod);
/* *********************** FC-NVME DMA Handling **************************** */
@@ -318,6 +344,188 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
}
+/* ********************** FC-NVME LS XMT Handling ************************* */
+
+
+static void
+__nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop)
+{
+ struct nvmet_fc_tgtport *tgtport = lsop->tgtport;
+ struct nvmefc_ls_req *lsreq = &lsop->ls_req;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+
+ if (!lsop->req_queued) {
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ return;
+ }
+
+ list_del(&lsop->lsreq_list);
+
+ lsop->req_queued = false;
+
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma,
+ (lsreq->rqstlen + lsreq->rsplen),
+ DMA_BIDIRECTIONAL);
+
+ nvmet_fc_tgtport_put(tgtport);
+}
+
+static int
+__nvmet_fc_send_ls_req(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_ls_req_op *lsop,
+ void (*done)(struct nvmefc_ls_req *req, int status))
+{
+ struct nvmefc_ls_req *lsreq = &lsop->ls_req;
+ unsigned long flags;
+ int ret = 0;
+
+ if (!tgtport->ops->ls_req)
+ return -EOPNOTSUPP;
+
+ if (!nvmet_fc_tgtport_get(tgtport))
+ return -ESHUTDOWN;
+
+ lsreq->done = done;
+ lsop->req_queued = false;
+ INIT_LIST_HEAD(&lsop->lsreq_list);
+
+ lsreq->rqstdma = fc_dma_map_single(tgtport->dev, lsreq->rqstaddr,
+ lsreq->rqstlen + lsreq->rsplen,
+ DMA_BIDIRECTIONAL);
+ if (fc_dma_mapping_error(tgtport->dev, lsreq->rqstdma)) {
+ ret = -EFAULT;
+ goto out_puttgtport;
+ }
+ lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+
+ list_add_tail(&lsop->lsreq_list, &tgtport->ls_req_list);
+
+ lsop->req_queued = true;
+
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ ret = tgtport->ops->ls_req(&tgtport->fc_target_port, lsop->hosthandle,
+ lsreq);
+ if (ret)
+ goto out_unlink;
+
+ return 0;
+
+out_unlink:
+ lsop->ls_error = ret;
+ spin_lock_irqsave(&tgtport->lock, flags);
+ lsop->req_queued = false;
+ list_del(&lsop->lsreq_list);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma,
+ (lsreq->rqstlen + lsreq->rsplen),
+ DMA_BIDIRECTIONAL);
+out_puttgtport:
+ nvmet_fc_tgtport_put(tgtport);
+
+ return ret;
+}
+
+static int
+nvmet_fc_send_ls_req_async(struct nvmet_fc_tgtport *tgtport,
+ struct nvmet_fc_ls_req_op *lsop,
+ void (*done)(struct nvmefc_ls_req *req, int status))
+{
+ /* don't wait for completion */
+
+ return __nvmet_fc_send_ls_req(tgtport, lsop, done);
+}
+
+static void
+nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status)
+{
+ struct nvmet_fc_ls_req_op *lsop =
+ container_of(lsreq, struct nvmet_fc_ls_req_op, ls_req);
+
+ __nvmet_fc_finish_ls_req(lsop);
+
+ /* fc-nvme target doesn't care about success or failure of cmd */
+
+ kfree(lsop);
+}
+
+/*
+ * This routine sends a FC-NVME LS to disconnect (aka terminate)
+ * the FC-NVME Association. Terminating the association also
+ * terminates the FC-NVME connections (per queue, both admin and io
+ * queues) that are part of the association. E.g. things are torn
+ * down, and the related FC-NVME Association ID and Connection IDs
+ * become invalid.
+ *
+ * The behavior of the fc-nvme target is such that it's
+ * understanding of the association and connections will implicitly
+ * be torn down. The action is implicit as it may be due to a loss of
+ * connectivity with the fc-nvme host, so the target may never get a
+ * response even if it tried. As such, the action of this routine
+ * is to asynchronously send the LS, ignore any results of the LS, and
+ * continue on with terminating the association. If the fc-nvme host
+ * is present and receives the LS, it too can tear down.
+ */
+static void
+nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc)
+{
+ struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
+ struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst;
+ struct fcnvme_ls_disconnect_assoc_acc *discon_acc;
+ struct nvmet_fc_ls_req_op *lsop;
+ struct nvmefc_ls_req *lsreq;
+ int ret;
+
+ /*
+ * If ls_req is NULL or no hosthandle, it's an older lldd and no
+ * message is normal. Otherwise, send unless the hostport has
+ * already been invalidated by the lldd.
+ */
+ if (!tgtport->ops->ls_req || !assoc->hostport ||
+ assoc->hostport->invalid)
+ return;
+
+ lsop = kzalloc((sizeof(*lsop) +
+ sizeof(*discon_rqst) + sizeof(*discon_acc) +
+ tgtport->ops->lsrqst_priv_sz), GFP_KERNEL);
+ if (!lsop) {
+ dev_info(tgtport->dev,
+ "{%d:%d} send Disconnect Association failed: ENOMEM\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
+ return;
+ }
+
+ discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1];
+ discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1];
+ lsreq = &lsop->ls_req;
+ if (tgtport->ops->lsrqst_priv_sz)
+ lsreq->private = (void *)&discon_acc[1];
+ else
+ lsreq->private = NULL;
+
+ lsop->tgtport = tgtport;
+ lsop->hosthandle = assoc->hostport->hosthandle;
+
+ nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc,
+ assoc->association_id);
+
+ ret = nvmet_fc_send_ls_req_async(tgtport, lsop,
+ nvmet_fc_disconnect_assoc_done);
+ if (ret) {
+ dev_info(tgtport->dev,
+ "{%d:%d} XMT Disconnect Association failed: %d\n",
+ tgtport->fc_target_port.port_num, assoc->a_id, ret);
+ kfree(lsop);
+ }
+}
+
+
/* *********************** FC-NVME Port Management ************************ */
@@ -337,17 +545,18 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport)
for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) {
INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work);
iod->tgtport = tgtport;
- list_add_tail(&iod->ls_list, &tgtport->ls_list);
+ list_add_tail(&iod->ls_rcv_list, &tgtport->ls_rcv_list);
- iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE,
- GFP_KERNEL);
+ iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) +
+ sizeof(union nvmefc_ls_responses),
+ GFP_KERNEL);
if (!iod->rqstbuf)
goto out_fail;
- iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE;
+ iod->rspbuf = (union nvmefc_ls_responses *)&iod->rqstbuf[1];
iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf,
- NVME_FC_MAX_LS_BUFFER_SIZE,
+ sizeof(*iod->rspbuf),
DMA_TO_DEVICE);
if (fc_dma_mapping_error(tgtport->dev, iod->rspdma))
goto out_fail;
@@ -357,12 +566,12 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport)
out_fail:
kfree(iod->rqstbuf);
- list_del(&iod->ls_list);
+ list_del(&iod->ls_rcv_list);
for (iod--, i--; i >= 0; iod--, i--) {
fc_dma_unmap_single(tgtport->dev, iod->rspdma,
- NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+ sizeof(*iod->rspbuf), DMA_TO_DEVICE);
kfree(iod->rqstbuf);
- list_del(&iod->ls_list);
+ list_del(&iod->ls_rcv_list);
}
kfree(iod);
@@ -378,10 +587,10 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport)
for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) {
fc_dma_unmap_single(tgtport->dev,
- iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE,
+ iod->rspdma, sizeof(*iod->rspbuf),
DMA_TO_DEVICE);
kfree(iod->rqstbuf);
- list_del(&iod->ls_list);
+ list_del(&iod->ls_rcv_list);
}
kfree(tgtport->iod);
}
@@ -393,10 +602,10 @@ nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport)
unsigned long flags;
spin_lock_irqsave(&tgtport->lock, flags);
- iod = list_first_entry_or_null(&tgtport->ls_list,
- struct nvmet_fc_ls_iod, ls_list);
+ iod = list_first_entry_or_null(&tgtport->ls_rcv_list,
+ struct nvmet_fc_ls_iod, ls_rcv_list);
if (iod)
- list_move_tail(&iod->ls_list, &tgtport->ls_busylist);
+ list_move_tail(&iod->ls_rcv_list, &tgtport->ls_busylist);
spin_unlock_irqrestore(&tgtport->lock, flags);
return iod;
}
@@ -409,7 +618,7 @@ nvmet_fc_free_ls_iod(struct nvmet_fc_tgtport *tgtport,
unsigned long flags;
spin_lock_irqsave(&tgtport->lock, flags);
- list_move(&iod->ls_list, &tgtport->ls_list);
+ list_move(&iod->ls_rcv_list, &tgtport->ls_rcv_list);
spin_unlock_irqrestore(&tgtport->lock, flags);
}
@@ -678,31 +887,33 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
struct nvmet_fc_fcp_iod *fod = queue->fod;
struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr;
unsigned long flags;
- int i, writedataactive;
+ int i;
bool disconnect;
disconnect = atomic_xchg(&queue->connected, 0);
+ /* if not connected, nothing to do */
+ if (!disconnect)
+ return;
+
spin_lock_irqsave(&queue->qlock, flags);
/* abort outstanding io's */
for (i = 0; i < queue->sqsize; fod++, i++) {
if (fod->active) {
spin_lock(&fod->flock);
fod->abort = true;
- writedataactive = fod->writedataactive;
- spin_unlock(&fod->flock);
/*
* only call lldd abort routine if waiting for
* writedata. other outstanding ops should finish
* on their own.
*/
- if (writedataactive) {
- spin_lock(&fod->flock);
+ if (fod->writedataactive) {
fod->aborted = true;
spin_unlock(&fod->flock);
tgtport->ops->fcp_abort(
&tgtport->fc_target_port, fod->fcpreq);
- }
+ } else
+ spin_unlock(&fod->flock);
}
}
@@ -742,8 +953,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
flush_workqueue(queue->work_q);
- if (disconnect)
- nvmet_sq_destroy(&queue->nvme_sq);
+ nvmet_sq_destroy(&queue->nvme_sq);
nvmet_fc_tgt_q_put(queue);
}
@@ -778,17 +988,114 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport,
}
static void
+nvmet_fc_hostport_free(struct kref *ref)
+{
+ struct nvmet_fc_hostport *hostport =
+ container_of(ref, struct nvmet_fc_hostport, ref);
+ struct nvmet_fc_tgtport *tgtport = hostport->tgtport;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_del(&hostport->host_list);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ if (tgtport->ops->host_release && hostport->invalid)
+ tgtport->ops->host_release(hostport->hosthandle);
+ kfree(hostport);
+ nvmet_fc_tgtport_put(tgtport);
+}
+
+static void
+nvmet_fc_hostport_put(struct nvmet_fc_hostport *hostport)
+{
+ kref_put(&hostport->ref, nvmet_fc_hostport_free);
+}
+
+static int
+nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport)
+{
+ return kref_get_unless_zero(&hostport->ref);
+}
+
+static void
+nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport)
+{
+ /* if LLDD not implemented, leave as NULL */
+ if (!hostport->hosthandle)
+ return;
+
+ nvmet_fc_hostport_put(hostport);
+}
+
+static struct nvmet_fc_hostport *
+nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
+{
+ struct nvmet_fc_hostport *newhost, *host, *match = NULL;
+ unsigned long flags;
+
+ /* if LLDD not implemented, leave as NULL */
+ if (!hosthandle)
+ return NULL;
+
+ /* take reference for what will be the newly allocated hostport */
+ if (!nvmet_fc_tgtport_get(tgtport))
+ return ERR_PTR(-EINVAL);
+
+ newhost = kzalloc(sizeof(*newhost), GFP_KERNEL);
+ if (!newhost) {
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_for_each_entry(host, &tgtport->host_list, host_list) {
+ if (host->hosthandle == hosthandle && !host->invalid) {
+ if (nvmet_fc_hostport_get(host)) {
+ match = host;
+ break;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+ /* no allocation - release reference */
+ nvmet_fc_tgtport_put(tgtport);
+ return (match) ? match : ERR_PTR(-ENOMEM);
+ }
+
+ newhost->tgtport = tgtport;
+ newhost->hosthandle = hosthandle;
+ INIT_LIST_HEAD(&newhost->host_list);
+ kref_init(&newhost->ref);
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_for_each_entry(host, &tgtport->host_list, host_list) {
+ if (host->hosthandle == hosthandle && !host->invalid) {
+ if (nvmet_fc_hostport_get(host)) {
+ match = host;
+ break;
+ }
+ }
+ }
+ if (match) {
+ kfree(newhost);
+ newhost = NULL;
+ /* releasing allocation - release reference */
+ nvmet_fc_tgtport_put(tgtport);
+ } else
+ list_add_tail(&newhost->host_list, &tgtport->host_list);
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ return (match) ? match : newhost;
+}
+
+static void
nvmet_fc_delete_assoc(struct work_struct *work)
{
struct nvmet_fc_tgt_assoc *assoc =
container_of(work, struct nvmet_fc_tgt_assoc, del_work);
nvmet_fc_delete_target_assoc(assoc);
+ atomic_set(&assoc->del_work_active, 0);
nvmet_fc_tgt_a_put(assoc);
}
static struct nvmet_fc_tgt_assoc *
-nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
+nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle)
{
struct nvmet_fc_tgt_assoc *assoc, *tmpassoc;
unsigned long flags;
@@ -805,13 +1112,19 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
goto out_free_assoc;
if (!nvmet_fc_tgtport_get(tgtport))
- goto out_ida_put;
+ goto out_ida;
+
+ assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle);
+ if (IS_ERR(assoc->hostport))
+ goto out_put;
assoc->tgtport = tgtport;
assoc->a_id = idx;
INIT_LIST_HEAD(&assoc->a_list);
kref_init(&assoc->ref);
INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc);
+ atomic_set(&assoc->del_work_active, 0);
+ atomic_set(&assoc->terminating, 0);
while (needrandom) {
get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID);
@@ -819,11 +1132,12 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
spin_lock_irqsave(&tgtport->lock, flags);
needrandom = false;
- list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list)
+ list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) {
if (ran == tmpassoc->association_id) {
needrandom = true;
break;
}
+ }
if (!needrandom) {
assoc->association_id = ran;
list_add_tail(&assoc->a_list, &tgtport->assoc_list);
@@ -833,7 +1147,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport)
return assoc;
-out_ida_put:
+out_put:
+ nvmet_fc_tgtport_put(tgtport);
+out_ida:
ida_simple_remove(&tgtport->assoc_cnt, idx);
out_free_assoc:
kfree(assoc);
@@ -846,12 +1162,24 @@ nvmet_fc_target_assoc_free(struct kref *ref)
struct nvmet_fc_tgt_assoc *assoc =
container_of(ref, struct nvmet_fc_tgt_assoc, ref);
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
+ struct nvmet_fc_ls_iod *oldls;
unsigned long flags;
+ /* Send Disconnect now that all i/o has completed */
+ nvmet_fc_xmt_disconnect_assoc(assoc);
+
+ nvmet_fc_free_hostport(assoc->hostport);
spin_lock_irqsave(&tgtport->lock, flags);
list_del(&assoc->a_list);
+ oldls = assoc->rcv_disconn;
spin_unlock_irqrestore(&tgtport->lock, flags);
+ /* if pending Rcv Disconnect Association LS, send rsp now */
+ if (oldls)
+ nvmet_fc_xmt_ls_rsp(tgtport, oldls);
ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id);
+ dev_info(tgtport->dev,
+ "{%d:%d} Association freed\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
kfree(assoc);
nvmet_fc_tgtport_put(tgtport);
}
@@ -874,7 +1202,13 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
struct nvmet_fc_tgtport *tgtport = assoc->tgtport;
struct nvmet_fc_tgt_queue *queue;
unsigned long flags;
- int i;
+ int i, terminating;
+
+ terminating = atomic_xchg(&assoc->terminating, 1);
+
+ /* if already terminating, do nothing */
+ if (terminating)
+ return;
spin_lock_irqsave(&tgtport->lock, flags);
for (i = NVMET_NR_QUEUES; i >= 0; i--) {
@@ -890,6 +1224,10 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc)
}
spin_unlock_irqrestore(&tgtport->lock, flags);
+ dev_info(tgtport->dev,
+ "{%d:%d} Association deleted\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
+
nvmet_fc_tgt_a_put(assoc);
}
@@ -1048,16 +1386,21 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo,
newrec->fc_target_port.node_name = pinfo->node_name;
newrec->fc_target_port.port_name = pinfo->port_name;
- newrec->fc_target_port.private = &newrec[1];
+ if (template->target_priv_sz)
+ newrec->fc_target_port.private = &newrec[1];
+ else
+ newrec->fc_target_port.private = NULL;
newrec->fc_target_port.port_id = pinfo->port_id;
newrec->fc_target_port.port_num = idx;
INIT_LIST_HEAD(&newrec->tgt_list);
newrec->dev = dev;
newrec->ops = template;
spin_lock_init(&newrec->lock);
- INIT_LIST_HEAD(&newrec->ls_list);
+ INIT_LIST_HEAD(&newrec->ls_rcv_list);
+ INIT_LIST_HEAD(&newrec->ls_req_list);
INIT_LIST_HEAD(&newrec->ls_busylist);
INIT_LIST_HEAD(&newrec->assoc_list);
+ INIT_LIST_HEAD(&newrec->host_list);
kref_init(&newrec->ref);
ida_init(&newrec->assoc_cnt);
newrec->max_sg_cnt = template->max_sgl_segments;
@@ -1134,17 +1477,90 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport)
{
struct nvmet_fc_tgt_assoc *assoc, *next;
unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&tgtport->lock, flags);
+ list_for_each_entry_safe(assoc, next,
+ &tgtport->assoc_list, a_list) {
+ if (!nvmet_fc_tgt_a_get(assoc))
+ continue;
+ ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
+ if (ret == 0) {
+ if (!schedule_work(&assoc->del_work))
+ nvmet_fc_tgt_a_put(assoc);
+ } else {
+ /* already deleting - release local reference */
+ nvmet_fc_tgt_a_put(assoc);
+ }
+ }
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+}
+
+/**
+ * nvmet_fc_invalidate_host - transport entry point called by an LLDD
+ * to remove references to a hosthandle for LS's.
+ *
+ * The nvmet-fc layer ensures that any references to the hosthandle
+ * on the targetport are forgotten (set to NULL). The LLDD will
+ * typically call this when a login with a remote host port has been
+ * lost, thus LS's for the remote host port are no longer possible.
+ *
+ * If an LS request is outstanding to the targetport/hosthandle (or
+ * issued concurrently with the call to invalidate the host), the
+ * LLDD is responsible for terminating/aborting the LS and completing
+ * the LS request. It is recommended that these terminations/aborts
+ * occur after calling to invalidate the host handle to avoid additional
+ * retries by the nvmet-fc transport. The nvmet-fc transport may
+ * continue to reference host handle while it cleans up outstanding
+ * NVME associations. The nvmet-fc transport will call the
+ * ops->host_release() callback to notify the LLDD that all references
+ * are complete and the related host handle can be recovered.
+ * Note: if there are no references, the callback may be called before
+ * the invalidate host call returns.
+ *
+ * @target_port: pointer to the (registered) target port that a prior
+ * LS was received on and which supplied the transport the
+ * hosthandle.
+ * @hosthandle: the handle (pointer) that represents the host port
+ * that no longer has connectivity and that LS's should
+ * no longer be directed to.
+ */
+void
+nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port,
+ void *hosthandle)
+{
+ struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
+ struct nvmet_fc_tgt_assoc *assoc, *next;
+ unsigned long flags;
+ bool noassoc = true;
+ int ret;
spin_lock_irqsave(&tgtport->lock, flags);
list_for_each_entry_safe(assoc, next,
&tgtport->assoc_list, a_list) {
+ if (!assoc->hostport ||
+ assoc->hostport->hosthandle != hosthandle)
+ continue;
if (!nvmet_fc_tgt_a_get(assoc))
continue;
- if (!schedule_work(&assoc->del_work))
+ assoc->hostport->invalid = 1;
+ noassoc = false;
+ ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
+ if (ret == 0) {
+ if (!schedule_work(&assoc->del_work))
+ nvmet_fc_tgt_a_put(assoc);
+ } else {
+ /* already deleting - release local reference */
nvmet_fc_tgt_a_put(assoc);
+ }
}
spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ /* if there's nothing to wait for - call the callback */
+ if (noassoc && tgtport->ops->host_release)
+ tgtport->ops->host_release(hosthandle);
}
+EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host);
/*
* nvmet layer has called to terminate an association
@@ -1157,6 +1573,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
struct nvmet_fc_tgt_queue *queue;
unsigned long flags;
bool found_ctrl = false;
+ int ret;
/* this is a bit ugly, but don't want to make locks layered */
spin_lock_irqsave(&nvmet_fc_tgtlock, flags);
@@ -1180,8 +1597,14 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl)
nvmet_fc_tgtport_put(tgtport);
if (found_ctrl) {
- if (!schedule_work(&assoc->del_work))
+ ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1);
+ if (ret == 0) {
+ if (!schedule_work(&assoc->del_work))
+ nvmet_fc_tgt_a_put(assoc);
+ } else {
+ /* already deleting - release local reference */
nvmet_fc_tgt_a_put(assoc);
+ }
return;
}
@@ -1211,6 +1634,13 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
/* terminate any outstanding associations */
__nvmet_fc_free_assocs(tgtport);
+ /*
+ * should terminate LS's as well. However, LS's will be generated
+ * at the tail end of association termination, so they likely don't
+ * exist yet. And even if they did, it's worthwhile to just let
+ * them finish and targetport ref counting will clean things up.
+ */
+
nvmet_fc_tgtport_put(tgtport);
return 0;
@@ -1218,113 +1648,15 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port)
EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport);
-/* *********************** FC-NVME LS Handling **************************** */
-
-
-static void
-nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd)
-{
- struct fcnvme_ls_acc_hdr *acc = buf;
-
- acc->w0.ls_cmd = ls_cmd;
- acc->desc_list_len = desc_len;
- acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST);
- acc->rqst.desc_len =
- fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst));
- acc->rqst.w0.ls_cmd = rqst_ls_cmd;
-}
+/* ********************** FC-NVME LS RCV Handling ************************* */
-static int
-nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd,
- u8 reason, u8 explanation, u8 vendor)
-{
- struct fcnvme_ls_rjt *rjt = buf;
-
- nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST,
- fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)),
- ls_cmd);
- rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT);
- rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt));
- rjt->rjt.reason_code = reason;
- rjt->rjt.reason_explanation = explanation;
- rjt->rjt.vendor = vendor;
-
- return sizeof(struct fcnvme_ls_rjt);
-}
-
-/* Validation Error indexes into the string table below */
-enum {
- VERR_NO_ERROR = 0,
- VERR_CR_ASSOC_LEN = 1,
- VERR_CR_ASSOC_RQST_LEN = 2,
- VERR_CR_ASSOC_CMD = 3,
- VERR_CR_ASSOC_CMD_LEN = 4,
- VERR_ERSP_RATIO = 5,
- VERR_ASSOC_ALLOC_FAIL = 6,
- VERR_QUEUE_ALLOC_FAIL = 7,
- VERR_CR_CONN_LEN = 8,
- VERR_CR_CONN_RQST_LEN = 9,
- VERR_ASSOC_ID = 10,
- VERR_ASSOC_ID_LEN = 11,
- VERR_NO_ASSOC = 12,
- VERR_CONN_ID = 13,
- VERR_CONN_ID_LEN = 14,
- VERR_NO_CONN = 15,
- VERR_CR_CONN_CMD = 16,
- VERR_CR_CONN_CMD_LEN = 17,
- VERR_DISCONN_LEN = 18,
- VERR_DISCONN_RQST_LEN = 19,
- VERR_DISCONN_CMD = 20,
- VERR_DISCONN_CMD_LEN = 21,
- VERR_DISCONN_SCOPE = 22,
- VERR_RS_LEN = 23,
- VERR_RS_RQST_LEN = 24,
- VERR_RS_CMD = 25,
- VERR_RS_CMD_LEN = 26,
- VERR_RS_RCTL = 27,
- VERR_RS_RO = 28,
-};
-
-static char *validation_errors[] = {
- "OK",
- "Bad CR_ASSOC Length",
- "Bad CR_ASSOC Rqst Length",
- "Not CR_ASSOC Cmd",
- "Bad CR_ASSOC Cmd Length",
- "Bad Ersp Ratio",
- "Association Allocation Failed",
- "Queue Allocation Failed",
- "Bad CR_CONN Length",
- "Bad CR_CONN Rqst Length",
- "Not Association ID",
- "Bad Association ID Length",
- "No Association",
- "Not Connection ID",
- "Bad Connection ID Length",
- "No Connection",
- "Not CR_CONN Cmd",
- "Bad CR_CONN Cmd Length",
- "Bad DISCONN Length",
- "Bad DISCONN Rqst Length",
- "Not DISCONN Cmd",
- "Bad DISCONN Cmd Length",
- "Bad Disconnect Scope",
- "Bad RS Length",
- "Bad RS Rqst Length",
- "Not RS Cmd",
- "Bad RS Cmd Length",
- "Bad RS R_CTL",
- "Bad RS Relative Offset",
-};
static void
nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_cr_assoc_rqst *rqst =
- (struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf;
- struct fcnvme_ls_cr_assoc_acc *acc =
- (struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf;
+ struct fcnvme_ls_cr_assoc_rqst *rqst = &iod->rqstbuf->rq_cr_assoc;
+ struct fcnvme_ls_cr_assoc_acc *acc = &iod->rspbuf->rsp_cr_assoc;
struct nvmet_fc_tgt_queue *queue;
int ret = 0;
@@ -1356,7 +1688,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
else {
/* new association w/ admin queue */
- iod->assoc = nvmet_fc_alloc_target_assoc(tgtport);
+ iod->assoc = nvmet_fc_alloc_target_assoc(
+ tgtport, iod->hosthandle);
if (!iod->assoc)
ret = VERR_ASSOC_ALLOC_FAIL;
else {
@@ -1371,8 +1704,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
dev_err(tgtport->dev,
"Create Association LS failed: %s\n",
validation_errors[ret]);
- iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
- NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+ sizeof(*acc), rqst->w0.ls_cmd,
FCNVME_RJT_RC_LOGIC,
FCNVME_RJT_EXP_NONE, 0);
return;
@@ -1382,11 +1715,15 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport,
atomic_set(&queue->connected, 1);
queue->sqhd = 0; /* best place to init value */
+ dev_info(tgtport->dev,
+ "{%d:%d} Association created\n",
+ tgtport->fc_target_port.port_num, iod->assoc->a_id);
+
/* format a response */
- iod->lsreq->rsplen = sizeof(*acc);
+ iod->lsrsp->rsplen = sizeof(*acc);
- nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
sizeof(struct fcnvme_ls_cr_assoc_acc)),
FCNVME_LS_CREATE_ASSOCIATION);
@@ -1407,10 +1744,8 @@ static void
nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_cr_conn_rqst *rqst =
- (struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf;
- struct fcnvme_ls_cr_conn_acc *acc =
- (struct fcnvme_ls_cr_conn_acc *)iod->rspbuf;
+ struct fcnvme_ls_cr_conn_rqst *rqst = &iod->rqstbuf->rq_cr_conn;
+ struct fcnvme_ls_cr_conn_acc *acc = &iod->rspbuf->rsp_cr_conn;
struct nvmet_fc_tgt_queue *queue;
int ret = 0;
@@ -1462,8 +1797,8 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
dev_err(tgtport->dev,
"Create Connection LS failed: %s\n",
validation_errors[ret]);
- iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
- NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+ sizeof(*acc), rqst->w0.ls_cmd,
(ret == VERR_NO_ASSOC) ?
FCNVME_RJT_RC_INV_ASSOC :
FCNVME_RJT_RC_LOGIC,
@@ -1477,9 +1812,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
/* format a response */
- iod->lsreq->rsplen = sizeof(*acc);
+ iod->lsrsp->rsplen = sizeof(*acc);
- nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)),
FCNVME_LS_CREATE_CONNECTION);
acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID);
@@ -1491,46 +1826,28 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport,
be16_to_cpu(rqst->connect_cmd.qid)));
}
-static void
+/*
+ * Returns true if the LS response is to be transmit
+ * Returns false if the LS response is to be delayed
+ */
+static int
nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
struct fcnvme_ls_disconnect_assoc_rqst *rqst =
- (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf;
+ &iod->rqstbuf->rq_dis_assoc;
struct fcnvme_ls_disconnect_assoc_acc *acc =
- (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf;
- struct nvmet_fc_tgt_assoc *assoc;
+ &iod->rspbuf->rsp_dis_assoc;
+ struct nvmet_fc_tgt_assoc *assoc = NULL;
+ struct nvmet_fc_ls_iod *oldls = NULL;
+ unsigned long flags;
int ret = 0;
memset(acc, 0, sizeof(*acc));
- if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst))
- ret = VERR_DISCONN_LEN;
- else if (rqst->desc_list_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_ls_disconnect_assoc_rqst)))
- ret = VERR_DISCONN_RQST_LEN;
- else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID))
- ret = VERR_ASSOC_ID;
- else if (rqst->associd.desc_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_lsdesc_assoc_id)))
- ret = VERR_ASSOC_ID_LEN;
- else if (rqst->discon_cmd.desc_tag !=
- cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD))
- ret = VERR_DISCONN_CMD;
- else if (rqst->discon_cmd.desc_len !=
- fcnvme_lsdesc_len(
- sizeof(struct fcnvme_lsdesc_disconn_cmd)))
- ret = VERR_DISCONN_CMD_LEN;
- /*
- * As the standard changed on the LS, check if old format and scope
- * something other than Association (e.g. 0).
- */
- else if (rqst->discon_cmd.rsvd8[0])
- ret = VERR_DISCONN_SCOPE;
- else {
- /* match an active association */
+ ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst);
+ if (!ret) {
+ /* match an active association - takes an assoc ref if !NULL */
assoc = nvmet_fc_find_target_assoc(tgtport,
be64_to_cpu(rqst->associd.association_id));
iod->assoc = assoc;
@@ -1538,34 +1855,63 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport,
ret = VERR_NO_ASSOC;
}
- if (ret) {
+ if (ret || !assoc) {
dev_err(tgtport->dev,
"Disconnect LS failed: %s\n",
validation_errors[ret]);
- iod->lsreq->rsplen = nvmet_fc_format_rjt(acc,
- NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(acc,
+ sizeof(*acc), rqst->w0.ls_cmd,
(ret == VERR_NO_ASSOC) ?
FCNVME_RJT_RC_INV_ASSOC :
- (ret == VERR_NO_CONN) ?
- FCNVME_RJT_RC_INV_CONN :
- FCNVME_RJT_RC_LOGIC,
+ FCNVME_RJT_RC_LOGIC,
FCNVME_RJT_EXP_NONE, 0);
- return;
+ return true;
}
/* format a response */
- iod->lsreq->rsplen = sizeof(*acc);
+ iod->lsrsp->rsplen = sizeof(*acc);
- nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
+ nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC,
fcnvme_lsdesc_len(
sizeof(struct fcnvme_ls_disconnect_assoc_acc)),
FCNVME_LS_DISCONNECT_ASSOC);
/* release get taken in nvmet_fc_find_target_assoc */
- nvmet_fc_tgt_a_put(iod->assoc);
+ nvmet_fc_tgt_a_put(assoc);
+
+ /*
+ * The rules for LS response says the response cannot
+ * go back until ABTS's have been sent for all outstanding
+ * I/O and a Disconnect Association LS has been sent.
+ * So... save off the Disconnect LS to send the response
+ * later. If there was a prior LS already saved, replace
+ * it with the newer one and send a can't perform reject
+ * on the older one.
+ */
+ spin_lock_irqsave(&tgtport->lock, flags);
+ oldls = assoc->rcv_disconn;
+ assoc->rcv_disconn = iod;
+ spin_unlock_irqrestore(&tgtport->lock, flags);
+
+ nvmet_fc_delete_target_assoc(assoc);
- nvmet_fc_delete_target_assoc(iod->assoc);
+ if (oldls) {
+ dev_info(tgtport->dev,
+ "{%d:%d} Multiple Disconnect Association LS's "
+ "received\n",
+ tgtport->fc_target_port.port_num, assoc->a_id);
+ /* overwrite good response with bogus failure */
+ oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf,
+ sizeof(*iod->rspbuf),
+ /* ok to use rqst, LS is same */
+ rqst->w0.ls_cmd,
+ FCNVME_RJT_RC_UNAB,
+ FCNVME_RJT_EXP_NONE, 0);
+ nvmet_fc_xmt_ls_rsp(tgtport, oldls);
+ }
+
+ return false;
}
@@ -1577,13 +1923,13 @@ static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req);
static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops;
static void
-nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq)
+nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp)
{
- struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private;
+ struct nvmet_fc_ls_iod *iod = lsrsp->nvme_fc_private;
struct nvmet_fc_tgtport *tgtport = iod->tgtport;
fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma,
- NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+ sizeof(*iod->rspbuf), DMA_TO_DEVICE);
nvmet_fc_free_ls_iod(tgtport, iod);
nvmet_fc_tgtport_put(tgtport);
}
@@ -1595,11 +1941,11 @@ nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport,
int ret;
fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma,
- NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE);
+ sizeof(*iod->rspbuf), DMA_TO_DEVICE);
- ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq);
+ ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp);
if (ret)
- nvmet_fc_xmt_ls_rsp_done(iod->lsreq);
+ nvmet_fc_xmt_ls_rsp_done(iod->lsrsp);
}
/*
@@ -1609,15 +1955,15 @@ static void
nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
struct nvmet_fc_ls_iod *iod)
{
- struct fcnvme_ls_rqst_w0 *w0 =
- (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf;
+ struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0;
+ bool sendrsp = true;
- iod->lsreq->nvmet_fc_private = iod;
- iod->lsreq->rspbuf = iod->rspbuf;
- iod->lsreq->rspdma = iod->rspdma;
- iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done;
+ iod->lsrsp->nvme_fc_private = iod;
+ iod->lsrsp->rspbuf = iod->rspbuf;
+ iod->lsrsp->rspdma = iod->rspdma;
+ iod->lsrsp->done = nvmet_fc_xmt_ls_rsp_done;
/* Be preventative. handlers will later set to valid length */
- iod->lsreq->rsplen = 0;
+ iod->lsrsp->rsplen = 0;
iod->assoc = NULL;
@@ -1637,15 +1983,16 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport,
break;
case FCNVME_LS_DISCONNECT_ASSOC:
/* Terminate a Queue/Connection or the Association */
- nvmet_fc_ls_disconnect(tgtport, iod);
+ sendrsp = nvmet_fc_ls_disconnect(tgtport, iod);
break;
default:
- iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf,
- NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd,
+ iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf,
+ sizeof(*iod->rspbuf), w0->ls_cmd,
FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0);
}
- nvmet_fc_xmt_ls_rsp(tgtport, iod);
+ if (sendrsp)
+ nvmet_fc_xmt_ls_rsp(tgtport, iod);
}
/*
@@ -1674,35 +2021,53 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work)
*
* @target_port: pointer to the (registered) target port the LS was
* received on.
- * @lsreq: pointer to a lsreq request structure to be used to reference
+ * @lsrsp: pointer to a lsrsp structure to be used to reference
* the exchange corresponding to the LS.
* @lsreqbuf: pointer to the buffer containing the LS Request
* @lsreqbuf_len: length, in bytes, of the received LS request
*/
int
nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port,
- struct nvmefc_tgt_ls_req *lsreq,
+ void *hosthandle,
+ struct nvmefc_ls_rsp *lsrsp,
void *lsreqbuf, u32 lsreqbuf_len)
{
struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port);
struct nvmet_fc_ls_iod *iod;
-
- if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE)
+ struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf;
+
+ if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) {
+ dev_info(tgtport->dev,
+ "RCV %s LS failed: payload too large (%d)\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "",
+ lsreqbuf_len);
return -E2BIG;
+ }
- if (!nvmet_fc_tgtport_get(tgtport))
+ if (!nvmet_fc_tgtport_get(tgtport)) {
+ dev_info(tgtport->dev,
+ "RCV %s LS failed: target deleting\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
return -ESHUTDOWN;
+ }
iod = nvmet_fc_alloc_ls_iod(tgtport);
if (!iod) {
+ dev_info(tgtport->dev,
+ "RCV %s LS failed: context allocation failed\n",
+ (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ?
+ nvmefc_ls_names[w0->ls_cmd] : "");
nvmet_fc_tgtport_put(tgtport);
return -ENOENT;
}
- iod->lsreq = lsreq;
+ iod->lsrsp = lsrsp;
iod->fcpreq = NULL;
memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len);
iod->rqstdatalen = lsreqbuf_len;
+ iod->hosthandle = hosthandle;
schedule_work(&iod->work);
diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c
index f69ce66e2d44..2ff1d1334a03 100644
--- a/drivers/nvme/target/fcloop.c
+++ b/drivers/nvme/target/fcloop.c
@@ -208,10 +208,13 @@ struct fcloop_rport {
};
struct fcloop_tport {
- struct nvmet_fc_target_port *targetport;
- struct nvme_fc_remote_port *remoteport;
- struct fcloop_nport *nport;
- struct fcloop_lport *lport;
+ struct nvmet_fc_target_port *targetport;
+ struct nvme_fc_remote_port *remoteport;
+ struct fcloop_nport *nport;
+ struct fcloop_lport *lport;
+ spinlock_t lock;
+ struct list_head ls_list;
+ struct work_struct ls_work;
};
struct fcloop_nport {
@@ -228,7 +231,8 @@ struct fcloop_nport {
struct fcloop_lsreq {
struct nvmefc_ls_req *lsreq;
- struct nvmefc_tgt_ls_req tgt_ls_req;
+ struct nvmefc_ls_rsp ls_rsp;
+ int lsdir; /* H2T or T2H */
int status;
struct list_head ls_list; /* fcloop_rport->ls_list */
};
@@ -267,9 +271,9 @@ struct fcloop_ini_fcpreq {
};
static inline struct fcloop_lsreq *
-tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq)
+ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp)
{
- return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req);
+ return container_of(lsrsp, struct fcloop_lsreq, ls_rsp);
}
static inline struct fcloop_fcpreq *
@@ -323,7 +327,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work)
}
static int
-fcloop_ls_req(struct nvme_fc_local_port *localport,
+fcloop_h2t_ls_req(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_req *lsreq)
{
@@ -344,27 +348,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport,
}
tls_req->status = 0;
- ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req,
- lsreq->rqstaddr, lsreq->rqstlen);
+ ret = nvmet_fc_rcv_ls_req(rport->targetport, rport,
+ &tls_req->ls_rsp,
+ lsreq->rqstaddr, lsreq->rqstlen);
return ret;
}
static int
-fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
- struct nvmefc_tgt_ls_req *tgt_lsreq)
+fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
+ struct nvmefc_ls_rsp *lsrsp)
{
- struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq);
+ struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
struct nvmefc_ls_req *lsreq = tls_req->lsreq;
struct fcloop_tport *tport = targetport->private;
struct nvme_fc_remote_port *remoteport = tport->remoteport;
struct fcloop_rport *rport;
- memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf,
- ((lsreq->rsplen < tgt_lsreq->rsplen) ?
- lsreq->rsplen : tgt_lsreq->rsplen));
+ memcpy(lsreq->rspaddr, lsrsp->rspbuf,
+ ((lsreq->rsplen < lsrsp->rsplen) ?
+ lsreq->rsplen : lsrsp->rsplen));
- tgt_lsreq->done(tgt_lsreq);
+ lsrsp->done(lsrsp);
if (remoteport) {
rport = remoteport->private;
@@ -377,6 +382,99 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport,
return 0;
}
+static void
+fcloop_tport_lsrqst_work(struct work_struct *work)
+{
+ struct fcloop_tport *tport =
+ container_of(work, struct fcloop_tport, ls_work);
+ struct fcloop_lsreq *tls_req;
+
+ spin_lock(&tport->lock);
+ for (;;) {
+ tls_req = list_first_entry_or_null(&tport->ls_list,
+ struct fcloop_lsreq, ls_list);
+ if (!tls_req)
+ break;
+
+ list_del(&tls_req->ls_list);
+ spin_unlock(&tport->lock);
+
+ tls_req->lsreq->done(tls_req->lsreq, tls_req->status);
+ /*
+ * callee may free memory containing tls_req.
+ * do not reference lsreq after this.
+ */
+
+ spin_lock(&tport->lock);
+ }
+ spin_unlock(&tport->lock);
+}
+
+static int
+fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle,
+ struct nvmefc_ls_req *lsreq)
+{
+ struct fcloop_lsreq *tls_req = lsreq->private;
+ struct fcloop_tport *tport = targetport->private;
+ int ret = 0;
+
+ /*
+ * hosthandle should be the dst.rport value.
+ * hosthandle ignored as fcloop currently is
+ * 1:1 tgtport vs remoteport
+ */
+ tls_req->lsreq = lsreq;
+ INIT_LIST_HEAD(&tls_req->ls_list);
+
+ if (!tport->remoteport) {
+ tls_req->status = -ECONNREFUSED;
+ spin_lock(&tport->lock);
+ list_add_tail(&tport->ls_list, &tls_req->ls_list);
+ spin_unlock(&tport->lock);
+ schedule_work(&tport->ls_work);
+ return ret;
+ }
+
+ tls_req->status = 0;
+ ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp,
+ lsreq->rqstaddr, lsreq->rqstlen);
+
+ return ret;
+}
+
+static int
+fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport,
+ struct nvme_fc_remote_port *remoteport,
+ struct nvmefc_ls_rsp *lsrsp)
+{
+ struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp);
+ struct nvmefc_ls_req *lsreq = tls_req->lsreq;
+ struct fcloop_rport *rport = remoteport->private;
+ struct nvmet_fc_target_port *targetport = rport->targetport;
+ struct fcloop_tport *tport;
+
+ memcpy(lsreq->rspaddr, lsrsp->rspbuf,
+ ((lsreq->rsplen < lsrsp->rsplen) ?
+ lsreq->rsplen : lsrsp->rsplen));
+ lsrsp->done(lsrsp);
+
+ if (targetport) {
+ tport = targetport->private;
+ spin_lock(&tport->lock);
+ list_add_tail(&tport->ls_list, &tls_req->ls_list);
+ spin_unlock(&tport->lock);
+ schedule_work(&tport->ls_work);
+ }
+
+ return 0;
+}
+
+static void
+fcloop_t2h_host_release(void *hosthandle)
+{
+ /* host handle ignored for now */
+}
+
/*
* Simulate reception of RSCN and converting it to a initiator transport
* call to rescan a remote port.
@@ -762,13 +860,19 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport,
}
static void
-fcloop_ls_abort(struct nvme_fc_local_port *localport,
+fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
struct nvmefc_ls_req *lsreq)
{
}
static void
+fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport,
+ void *hosthandle, struct nvmefc_ls_req *lsreq)
+{
+}
+
+static void
fcloop_fcp_abort(struct nvme_fc_local_port *localport,
struct nvme_fc_remote_port *remoteport,
void *hw_queue_handle,
@@ -867,6 +971,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport)
{
struct fcloop_tport *tport = targetport->private;
+ flush_work(&tport->ls_work);
fcloop_nport_put(tport->nport);
}
@@ -879,10 +984,11 @@ static struct nvme_fc_port_template fctemplate = {
.remoteport_delete = fcloop_remoteport_delete,
.create_queue = fcloop_create_queue,
.delete_queue = fcloop_delete_queue,
- .ls_req = fcloop_ls_req,
+ .ls_req = fcloop_h2t_ls_req,
.fcp_io = fcloop_fcp_req,
- .ls_abort = fcloop_ls_abort,
+ .ls_abort = fcloop_h2t_ls_abort,
.fcp_abort = fcloop_fcp_abort,
+ .xmt_ls_rsp = fcloop_t2h_xmt_ls_rsp,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
@@ -896,11 +1002,14 @@ static struct nvme_fc_port_template fctemplate = {
static struct nvmet_fc_target_template tgttemplate = {
.targetport_delete = fcloop_targetport_delete,
- .xmt_ls_rsp = fcloop_xmt_ls_rsp,
+ .xmt_ls_rsp = fcloop_h2t_xmt_ls_rsp,
.fcp_op = fcloop_fcp_op,
.fcp_abort = fcloop_tgt_fcp_abort,
.fcp_req_release = fcloop_fcp_req_release,
.discovery_event = fcloop_tgt_discovery_evt,
+ .ls_req = fcloop_t2h_ls_req,
+ .ls_abort = fcloop_t2h_ls_abort,
+ .host_release = fcloop_t2h_host_release,
.max_hw_queues = FCLOOP_HW_QUEUES,
.max_sgl_segments = FCLOOP_SGL_SEGS,
.max_dif_sgl_segments = FCLOOP_SGL_SEGS,
@@ -909,6 +1018,7 @@ static struct nvmet_fc_target_template tgttemplate = {
.target_features = 0,
/* sizes of additional private data for data structures */
.target_priv_sz = sizeof(struct fcloop_tport),
+ .lsrqst_priv_sz = sizeof(struct fcloop_lsreq),
};
static ssize_t
@@ -1258,6 +1368,9 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr,
tport->nport = nport;
tport->lport = nport->lport;
nport->tport = tport;
+ spin_lock_init(&tport->lock);
+ INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work);
+ INIT_LIST_HEAD(&tport->ls_list);
return count;
}
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index 26f50c23b82e..3dd6f566a240 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
id->nows = to0based(ql->io_opt / ql->logical_block_size);
}
+static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns)
+{
+ struct blk_integrity *bi = bdev_get_integrity(ns->bdev);
+
+ if (bi) {
+ ns->metadata_size = bi->tuple_size;
+ if (bi->profile == &t10_pi_type1_crc)
+ ns->pi_type = NVME_NS_DPS_PI_TYPE1;
+ else if (bi->profile == &t10_pi_type3_crc)
+ ns->pi_type = NVME_NS_DPS_PI_TYPE3;
+ else
+ /* Unsupported metadata type */
+ ns->metadata_size = 0;
+ }
+}
+
int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
{
int ret;
@@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns)
}
ns->size = i_size_read(ns->bdev->bd_inode);
ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
+
+ ns->pi_type = 0;
+ ns->metadata_size = 0;
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10))
+ nvmet_bdev_ns_enable_integrity(ns);
+
return 0;
}
@@ -75,6 +97,11 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
}
}
+void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns)
+{
+ ns->size = i_size_read(ns->bdev->bd_inode);
+}
+
static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts)
{
u16 status = NVME_SC_SUCCESS;
@@ -142,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio)
bio_put(bio);
}
+#ifdef CONFIG_BLK_DEV_INTEGRITY
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+ struct sg_mapping_iter *miter)
+{
+ struct blk_integrity *bi;
+ struct bio_integrity_payload *bip;
+ struct block_device *bdev = req->ns->bdev;
+ int rc;
+ size_t resid, len;
+
+ bi = bdev_get_integrity(bdev);
+ if (unlikely(!bi)) {
+ pr_err("Unable to locate bio_integrity\n");
+ return -ENODEV;
+ }
+
+ bip = bio_integrity_alloc(bio, GFP_NOIO,
+ min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
+ if (IS_ERR(bip)) {
+ pr_err("Unable to allocate bio_integrity_payload\n");
+ return PTR_ERR(bip);
+ }
+
+ bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio));
+ /* virtual start sector must be in integrity interval units */
+ bip_set_seed(bip, bio->bi_iter.bi_sector >>
+ (bi->interval_exp - SECTOR_SHIFT));
+
+ resid = bip->bip_iter.bi_size;
+ while (resid > 0 && sg_miter_next(miter)) {
+ len = min_t(size_t, miter->length, resid);
+ rc = bio_integrity_add_page(bio, miter->page, len,
+ offset_in_page(miter->addr));
+ if (unlikely(rc != len)) {
+ pr_err("bio_integrity_add_page() failed; %d\n", rc);
+ sg_miter_stop(miter);
+ return -ENOMEM;
+ }
+
+ resid -= len;
+ if (len < miter->length)
+ miter->consumed -= miter->length - len;
+ }
+ sg_miter_stop(miter);
+
+ return 0;
+}
+#else
+static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
+ struct sg_mapping_iter *miter)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
static void nvmet_bdev_execute_rw(struct nvmet_req *req)
{
int sg_cnt = req->sg_cnt;
@@ -149,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
struct scatterlist *sg;
struct blk_plug plug;
sector_t sector;
- int op, i;
+ int op, i, rc;
+ struct sg_mapping_iter prot_miter;
+ unsigned int iter_flags;
+ unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len;
- if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ if (!nvmet_check_transfer_len(req, total_len))
return;
if (!req->sg_cnt) {
@@ -163,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA))
op |= REQ_FUA;
+ iter_flags = SG_MITER_TO_SG;
} else {
op = REQ_OP_READ;
+ iter_flags = SG_MITER_FROM_SG;
}
if (is_pci_p2pdma_page(sg_page(req->sg)))
@@ -186,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
bio->bi_opf = op;
blk_start_plug(&plug);
+ if (req->metadata_len)
+ sg_miter_start(&prot_miter, req->metadata_sg,
+ req->metadata_sg_cnt, iter_flags);
+
for_each_sg(req->sg, sg, req->sg_cnt, i) {
while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset)
!= sg->length) {
struct bio *prev = bio;
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio,
+ &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
+ }
+
bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
bio_set_dev(bio, req->ns->bdev);
bio->bi_iter.bi_sector = sector;
@@ -204,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
sg_cnt--;
}
+ if (req->metadata_len) {
+ rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter);
+ if (unlikely(rc)) {
+ bio_io_error(bio);
+ return;
+ }
+ }
+
submit_bio(bio);
blk_finish_plug(&plug);
}
@@ -212,7 +320,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req)
{
struct bio *bio = &req->b.inline_bio;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
@@ -304,7 +412,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
sector_t nr_sector;
int ret;
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
sector = le64_to_cpu(write_zeroes->slba) <<
@@ -331,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
case nvme_cmd_read:
case nvme_cmd_write:
req->execute = nvmet_bdev_execute_rw;
+ if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns))
+ req->metadata_len = nvmet_rw_metadata_len(req);
return 0;
case nvme_cmd_flush:
req->execute = nvmet_bdev_execute_flush;
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index cd5670b83118..0abbefd9925e 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -13,6 +13,18 @@
#define NVMET_MAX_MPOOL_BVEC 16
#define NVMET_MIN_MPOOL_OBJ 16
+int nvmet_file_ns_revalidate(struct nvmet_ns *ns)
+{
+ struct kstat stat;
+ int ret;
+
+ ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE,
+ AT_STATX_FORCE_SYNC);
+ if (!ret)
+ ns->size = stat.size;
+ return ret;
+}
+
void nvmet_file_ns_disable(struct nvmet_ns *ns)
{
if (ns->file) {
@@ -30,7 +42,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns)
int nvmet_file_ns_enable(struct nvmet_ns *ns)
{
int flags = O_RDWR | O_LARGEFILE;
- struct kstat stat;
int ret;
if (!ns->buffered_io)
@@ -43,12 +54,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
return PTR_ERR(ns->file);
}
- ret = vfs_getattr(&ns->file->f_path,
- &stat, STATX_SIZE, AT_STATX_FORCE_SYNC);
+ ret = nvmet_file_ns_revalidate(ns);
if (ret)
goto err;
- ns->size = stat.size;
/*
* i_blkbits can be greater than the universally accepted upper bound,
* so make sure we export a sane namespace lba_shift.
@@ -232,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req)
{
ssize_t nr_bvec = req->sg_cnt;
- if (!nvmet_check_data_len(req, nvmet_rw_len(req)))
+ if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req)))
return;
if (!req->sg_cnt || !nr_bvec) {
@@ -276,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w)
static void nvmet_file_execute_flush(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_flush_work);
schedule_work(&req->f.work);
@@ -366,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w)
static void nvmet_file_execute_write_zeroes(struct nvmet_req *req)
{
- if (!nvmet_check_data_len(req, 0))
+ if (!nvmet_check_transfer_len(req, 0))
return;
INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work);
schedule_work(&req->f.work);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 421dff3ea143..809691291e73 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -19,6 +19,7 @@
#include <linux/rcupdate.h>
#include <linux/blkdev.h>
#include <linux/radix-tree.h>
+#include <linux/t10-pi.h>
#define NVMET_ASYNC_EVENTS 4
#define NVMET_ERROR_LOG_SLOTS 128
@@ -77,6 +78,8 @@ struct nvmet_ns {
int use_p2pmem;
struct pci_dev *p2p_dev;
+ int pi_type;
+ int metadata_size;
};
static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item)
@@ -142,6 +145,7 @@ struct nvmet_port {
bool enabled;
int inline_data_size;
const struct nvmet_fabrics_ops *tr_ops;
+ bool pi_enable;
};
static inline struct nvmet_port *to_nvmet_port(struct config_item *item)
@@ -201,6 +205,7 @@ struct nvmet_ctrl {
spinlock_t error_lock;
u64 err_counter;
struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS];
+ bool pi_support;
};
struct nvmet_subsys_model {
@@ -230,6 +235,7 @@ struct nvmet_subsys {
u64 ver;
u64 serial;
char *subsysnqn;
+ bool pi_support;
struct config_group group;
@@ -281,6 +287,7 @@ struct nvmet_fabrics_ops {
unsigned int type;
unsigned int msdbd;
bool has_keyed_sgls : 1;
+ bool metadata_support : 1;
void (*queue_response)(struct nvmet_req *req);
int (*add_port)(struct nvmet_port *port);
void (*remove_port)(struct nvmet_port *port);
@@ -302,6 +309,7 @@ struct nvmet_req {
struct nvmet_cq *cq;
struct nvmet_ns *ns;
struct scatterlist *sg;
+ struct scatterlist *metadata_sg;
struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC];
union {
struct {
@@ -315,8 +323,10 @@ struct nvmet_req {
} f;
};
int sg_cnt;
+ int metadata_sg_cnt;
/* data length as parsed from the SGL descriptor: */
size_t transfer_len;
+ size_t metadata_len;
struct nvmet_port *port;
@@ -384,11 +394,11 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req);
bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops);
void nvmet_req_uninit(struct nvmet_req *req);
-bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len);
+bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len);
bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len);
void nvmet_req_complete(struct nvmet_req *req, u16 status);
-int nvmet_req_alloc_sgl(struct nvmet_req *req);
-void nvmet_req_free_sgl(struct nvmet_req *req);
+int nvmet_req_alloc_sgls(struct nvmet_req *req);
+void nvmet_req_free_sgls(struct nvmet_req *req);
void nvmet_execute_keep_alive(struct nvmet_req *req);
@@ -498,13 +508,24 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns);
u16 nvmet_bdev_flush(struct nvmet_req *req);
u16 nvmet_file_flush(struct nvmet_req *req);
void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid);
+void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns);
+int nvmet_file_ns_revalidate(struct nvmet_ns *ns);
+void nvmet_ns_revalidate(struct nvmet_ns *ns);
-static inline u32 nvmet_rw_len(struct nvmet_req *req)
+static inline u32 nvmet_rw_data_len(struct nvmet_req *req)
{
return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) <<
req->ns->blksize_shift;
}
+static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return 0;
+ return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) *
+ req->ns->metadata_size;
+}
+
static inline u32 nvmet_dsm_len(struct nvmet_req *req)
{
return (le32_to_cpu(req->cmd->dsm.nr) + 1) *
@@ -519,4 +540,11 @@ static inline __le16 to0based(u32 a)
return cpu_to_le16(max(1U, min(1U << 16, a)) - 1);
}
+static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns)
+{
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return false;
+ return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple);
+}
+
#endif /* _NVMET_H */
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index fd47de0e4e4e..d5141780592e 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -33,6 +33,9 @@
/* Assume mpsmin == device_page_size == 4KB */
#define NVMET_RDMA_MAX_MDTS 8
+#define NVMET_RDMA_MAX_METADATA_MDTS 5
+
+struct nvmet_rdma_srq;
struct nvmet_rdma_cmd {
struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
@@ -41,6 +44,7 @@ struct nvmet_rdma_cmd {
struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
struct nvme_command *nvme_cmd;
struct nvmet_rdma_queue *queue;
+ struct nvmet_rdma_srq *nsrq;
};
enum {
@@ -57,6 +61,7 @@ struct nvmet_rdma_rsp {
struct nvmet_rdma_queue *queue;
struct ib_cqe read_cqe;
+ struct ib_cqe write_cqe;
struct rdma_rw_ctx rw;
struct nvmet_req req;
@@ -83,6 +88,7 @@ struct nvmet_rdma_queue {
struct ib_cq *cq;
atomic_t sq_wr_avail;
struct nvmet_rdma_device *dev;
+ struct nvmet_rdma_srq *nsrq;
spinlock_t state_lock;
enum nvmet_rdma_queue_state state;
struct nvmet_cq nvme_cq;
@@ -100,6 +106,7 @@ struct nvmet_rdma_queue {
int idx;
int host_qid;
+ int comp_vector;
int recv_queue_size;
int send_queue_size;
@@ -113,11 +120,17 @@ struct nvmet_rdma_port {
struct delayed_work repair_work;
};
+struct nvmet_rdma_srq {
+ struct ib_srq *srq;
+ struct nvmet_rdma_cmd *cmds;
+ struct nvmet_rdma_device *ndev;
+};
+
struct nvmet_rdma_device {
struct ib_device *device;
struct ib_pd *pd;
- struct ib_srq *srq;
- struct nvmet_rdma_cmd *srq_cmds;
+ struct nvmet_rdma_srq **srqs;
+ int srq_count;
size_t srq_size;
struct kref ref;
struct list_head entry;
@@ -129,6 +142,16 @@ static bool nvmet_rdma_use_srq;
module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
+static int srq_size_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops srq_size_ops = {
+ .set = srq_size_set,
+ .get = param_get_int,
+};
+
+static int nvmet_rdma_srq_size = 1024;
+module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644);
+MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)");
+
static DEFINE_IDA(nvmet_rdma_queue_ida);
static LIST_HEAD(nvmet_rdma_queue_list);
static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
@@ -140,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
@@ -149,6 +173,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
static const struct nvmet_fabrics_ops nvmet_rdma_ops;
+static int srq_size_set(const char *val, const struct kernel_param *kp)
+{
+ int n = 0, ret;
+
+ ret = kstrtoint(val, 10, &n);
+ if (ret != 0 || n < 256)
+ return -EINVAL;
+
+ return param_set_int(val, kp);
+}
+
static int num_pages(int len)
{
return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
@@ -391,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
/* Data In / RDMA READ */
r->read_cqe.done = nvmet_rdma_read_data_done;
+ /* Data Out / RDMA WRITE */
+ r->write_cqe.done = nvmet_rdma_write_data_done;
+
return 0;
out_free_rsp:
@@ -466,8 +504,8 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev,
cmd->sge[0].addr, cmd->sge[0].length,
DMA_FROM_DEVICE);
- if (ndev->srq)
- ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
+ if (cmd->nsrq)
+ ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL);
else
ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
@@ -500,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue)
spin_unlock(&queue->rsp_wr_wait_lock);
}
+static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
+{
+ struct ib_mr_status mr_status;
+ int ret;
+ u16 status = 0;
+
+ ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
+ if (ret) {
+ pr_err("ib_check_mr_status failed, ret %d\n", ret);
+ return NVME_SC_INVALID_PI;
+ }
+
+ if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
+ switch (mr_status.sig_err.err_type) {
+ case IB_SIG_BAD_GUARD:
+ status = NVME_SC_GUARD_CHECK;
+ break;
+ case IB_SIG_BAD_REFTAG:
+ status = NVME_SC_REFTAG_CHECK;
+ break;
+ case IB_SIG_BAD_APPTAG:
+ status = NVME_SC_APPTAG_CHECK;
+ break;
+ }
+ pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
+ mr_status.sig_err.err_type,
+ mr_status.sig_err.expected,
+ mr_status.sig_err.actual);
+ }
+
+ return status;
+}
+
+static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
+ struct nvme_command *cmd, struct ib_sig_domain *domain,
+ u16 control, u8 pi_type)
+{
+ domain->sig_type = IB_SIG_TYPE_T10_DIF;
+ domain->sig.dif.bg_type = IB_T10DIF_CRC;
+ domain->sig.dif.pi_interval = 1 << bi->interval_exp;
+ domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ domain->sig.dif.ref_remap = true;
+
+ domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
+ domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
+ domain->sig.dif.app_escape = true;
+ if (pi_type == NVME_NS_DPS_PI_TYPE3)
+ domain->sig.dif.ref_escape = true;
+}
+
+static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
+ struct ib_sig_attrs *sig_attrs)
+{
+ struct nvme_command *cmd = req->cmd;
+ u16 control = le16_to_cpu(cmd->rw.control);
+ u8 pi_type = req->ns->pi_type;
+ struct blk_integrity *bi;
+
+ bi = bdev_get_integrity(req->ns->bdev);
+
+ memset(sig_attrs, 0, sizeof(*sig_attrs));
+
+ if (control & NVME_RW_PRINFO_PRACT) {
+ /* for WRITE_INSERT/READ_STRIP no wire domain */
+ sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ /* Clear the PRACT bit since HCA will generate/verify the PI */
+ control &= ~NVME_RW_PRINFO_PRACT;
+ cmd->rw.control = cpu_to_le16(control);
+ /* PI is added by the HW */
+ req->transfer_len += req->metadata_len;
+ } else {
+ /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
+ pi_type);
+ nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
+ pi_type);
+ }
+
+ if (control & NVME_RW_PRINFO_PRCHK_REF)
+ sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
+ if (control & NVME_RW_PRINFO_PRCHK_GUARD)
+ sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
+ if (control & NVME_RW_PRINFO_PRCHK_APP)
+ sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
+}
+
+static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
+ struct ib_sig_attrs *sig_attrs)
+{
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ struct nvmet_req *req = &rsp->req;
+ int ret;
+
+ if (req->metadata_len)
+ ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
+ cm_id->port_num, req->sg, req->sg_cnt,
+ req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
+ addr, key, nvmet_data_dir(req));
+ else
+ ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
+ req->sg, req->sg_cnt, 0, addr, key,
+ nvmet_data_dir(req));
+
+ return ret;
+}
+
+static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
+{
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ struct nvmet_req *req = &rsp->req;
+
+ if (req->metadata_len)
+ rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
+ cm_id->port_num, req->sg, req->sg_cnt,
+ req->metadata_sg, req->metadata_sg_cnt,
+ nvmet_data_dir(req));
+ else
+ rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
+ req->sg, req->sg_cnt, nvmet_data_dir(req));
+}
static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
{
@@ -507,14 +668,11 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
- if (rsp->n_rdma) {
- rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
- queue->cm_id->port_num, rsp->req.sg,
- rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
- }
+ if (rsp->n_rdma)
+ nvmet_rdma_rw_ctx_destroy(rsp);
if (rsp->req.sg != rsp->cmd->inline_sg)
- nvmet_req_free_sgl(&rsp->req);
+ nvmet_req_free_sgls(&rsp->req);
if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
nvmet_rdma_process_wr_wait_list(queue);
@@ -566,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req)
rsp->send_wr.opcode = IB_WR_SEND;
}
- if (nvmet_rdma_need_data_out(rsp))
- first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
- cm_id->port_num, NULL, &rsp->send_wr);
- else
+ if (nvmet_rdma_need_data_out(rsp)) {
+ if (rsp->req.metadata_len)
+ first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+ cm_id->port_num, &rsp->write_cqe, NULL);
+ else
+ first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
+ cm_id->port_num, NULL, &rsp->send_wr);
+ } else {
first_wr = &rsp->send_wr;
+ }
nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
@@ -589,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
struct nvmet_rdma_rsp *rsp =
container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
struct nvmet_rdma_queue *queue = cq->cq_context;
+ u16 status = 0;
WARN_ON(rsp->n_rdma <= 0);
atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
- rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
- queue->cm_id->port_num, rsp->req.sg,
- rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
rsp->n_rdma = 0;
if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ nvmet_rdma_rw_ctx_destroy(rsp);
nvmet_req_uninit(&rsp->req);
nvmet_rdma_release_rsp(rsp);
if (wc->status != IB_WC_WR_FLUSH_ERR) {
@@ -608,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
return;
}
- rsp->req.execute(&rsp->req);
+ if (rsp->req.metadata_len)
+ status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+ nvmet_rdma_rw_ctx_destroy(rsp);
+
+ if (unlikely(status))
+ nvmet_req_complete(&rsp->req, status);
+ else
+ rsp->req.execute(&rsp->req);
+}
+
+static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct nvmet_rdma_rsp *rsp =
+ container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
+ struct nvmet_rdma_queue *queue = cq->cq_context;
+ struct rdma_cm_id *cm_id = rsp->queue->cm_id;
+ u16 status;
+
+ if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
+ return;
+
+ WARN_ON(rsp->n_rdma <= 0);
+ atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
+ rsp->n_rdma = 0;
+
+ if (unlikely(wc->status != IB_WC_SUCCESS)) {
+ nvmet_rdma_rw_ctx_destroy(rsp);
+ nvmet_req_uninit(&rsp->req);
+ nvmet_rdma_release_rsp(rsp);
+ if (wc->status != IB_WC_WR_FLUSH_ERR) {
+ pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n",
+ wc->wr_cqe, ib_wc_status_msg(wc->status),
+ wc->status);
+ nvmet_rdma_error_comp(queue);
+ }
+ return;
+ }
+
+ /*
+ * Upon RDMA completion check the signature status
+ * - if succeeded send good NVMe response
+ * - if failed send bad NVMe response with appropriate error
+ */
+ status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
+ if (unlikely(status))
+ rsp->req.cqe->status = cpu_to_le16(status << 1);
+ nvmet_rdma_rw_ctx_destroy(rsp);
+
+ if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
+ pr_err("sending cmd response failed\n");
+ nvmet_rdma_release_rsp(rsp);
+ }
}
static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
@@ -665,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp)
static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
struct nvme_keyed_sgl_desc *sgl, bool invalidate)
{
- struct rdma_cm_id *cm_id = rsp->queue->cm_id;
u64 addr = le64_to_cpu(sgl->addr);
u32 key = get_unaligned_le32(sgl->key);
+ struct ib_sig_attrs sig_attrs;
int ret;
rsp->req.transfer_len = get_unaligned_le24(sgl->length);
@@ -676,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
if (!rsp->req.transfer_len)
return 0;
- ret = nvmet_req_alloc_sgl(&rsp->req);
+ if (rsp->req.metadata_len)
+ nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
+
+ ret = nvmet_req_alloc_sgls(&rsp->req);
if (unlikely(ret < 0))
goto error_out;
- ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
- rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
- nvmet_data_dir(&rsp->req));
+ ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
if (unlikely(ret < 0))
goto error_out;
rsp->n_rdma += ret;
@@ -845,23 +1059,40 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc)
nvmet_rdma_handle_command(queue, rsp);
}
-static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev)
+static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq)
{
- if (!ndev->srq)
+ nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size,
+ false);
+ ib_destroy_srq(nsrq->srq);
+
+ kfree(nsrq);
+}
+
+static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev)
+{
+ int i;
+
+ if (!ndev->srqs)
return;
- nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
- ib_destroy_srq(ndev->srq);
+ for (i = 0; i < ndev->srq_count; i++)
+ nvmet_rdma_destroy_srq(ndev->srqs[i]);
+
+ kfree(ndev->srqs);
}
-static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
+static struct nvmet_rdma_srq *
+nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
{
struct ib_srq_init_attr srq_attr = { NULL, };
+ size_t srq_size = ndev->srq_size;
+ struct nvmet_rdma_srq *nsrq;
struct ib_srq *srq;
- size_t srq_size;
int ret, i;
- srq_size = 4095; /* XXX: tune */
+ nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL);
+ if (!nsrq)
+ return ERR_PTR(-ENOMEM);
srq_attr.attr.max_wr = srq_size;
srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
@@ -869,35 +1100,73 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
srq_attr.srq_type = IB_SRQT_BASIC;
srq = ib_create_srq(ndev->pd, &srq_attr);
if (IS_ERR(srq)) {
- /*
- * If SRQs aren't supported we just go ahead and use normal
- * non-shared receive queues.
- */
- pr_info("SRQ requested but not supported.\n");
- return 0;
+ ret = PTR_ERR(srq);
+ goto out_free;
}
- ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
- if (IS_ERR(ndev->srq_cmds)) {
- ret = PTR_ERR(ndev->srq_cmds);
+ nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
+ if (IS_ERR(nsrq->cmds)) {
+ ret = PTR_ERR(nsrq->cmds);
goto out_destroy_srq;
}
- ndev->srq = srq;
- ndev->srq_size = srq_size;
+ nsrq->srq = srq;
+ nsrq->ndev = ndev;
for (i = 0; i < srq_size; i++) {
- ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
+ nsrq->cmds[i].nsrq = nsrq;
+ ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]);
if (ret)
goto out_free_cmds;
}
- return 0;
+ return nsrq;
out_free_cmds:
- nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
+ nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false);
out_destroy_srq:
ib_destroy_srq(srq);
+out_free:
+ kfree(nsrq);
+ return ERR_PTR(ret);
+}
+
+static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev)
+{
+ int i, ret;
+
+ if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) {
+ /*
+ * If SRQs aren't supported we just go ahead and use normal
+ * non-shared receive queues.
+ */
+ pr_info("SRQ requested but not supported.\n");
+ return 0;
+ }
+
+ ndev->srq_size = min(ndev->device->attrs.max_srq_wr,
+ nvmet_rdma_srq_size);
+ ndev->srq_count = min(ndev->device->num_comp_vectors,
+ ndev->device->attrs.max_srq);
+
+ ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL);
+ if (!ndev->srqs)
+ return -ENOMEM;
+
+ for (i = 0; i < ndev->srq_count; i++) {
+ ndev->srqs[i] = nvmet_rdma_init_srq(ndev);
+ if (IS_ERR(ndev->srqs[i])) {
+ ret = PTR_ERR(ndev->srqs[i]);
+ goto err_srq;
+ }
+ }
+
+ return 0;
+
+err_srq:
+ while (--i >= 0)
+ nvmet_rdma_destroy_srq(ndev->srqs[i]);
+ kfree(ndev->srqs);
return ret;
}
@@ -910,7 +1179,7 @@ static void nvmet_rdma_free_dev(struct kref *ref)
list_del(&ndev->entry);
mutex_unlock(&device_list_mutex);
- nvmet_rdma_destroy_srq(ndev);
+ nvmet_rdma_destroy_srqs(ndev);
ib_dealloc_pd(ndev->pd);
kfree(ndev);
@@ -957,7 +1226,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
goto out_free_dev;
if (nvmet_rdma_use_srq) {
- ret = nvmet_rdma_init_srq(ndev);
+ ret = nvmet_rdma_init_srqs(ndev);
if (ret)
goto out_free_pd;
}
@@ -981,14 +1250,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
{
struct ib_qp_init_attr qp_attr;
struct nvmet_rdma_device *ndev = queue->dev;
- int comp_vector, nr_cqe, ret, i, factor;
-
- /*
- * Spread the io queues across completion vectors,
- * but still keep all admin queues on vector 0.
- */
- comp_vector = !queue->host_qid ? 0 :
- queue->idx % ndev->device->num_comp_vectors;
+ int nr_cqe, ret, i, factor;
/*
* Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
@@ -996,7 +1258,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
queue->cq = ib_alloc_cq(ndev->device, queue,
- nr_cqe + 1, comp_vector,
+ nr_cqe + 1, queue->comp_vector,
IB_POLL_WORKQUEUE);
if (IS_ERR(queue->cq)) {
ret = PTR_ERR(queue->cq);
@@ -1020,14 +1282,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
ndev->device->attrs.max_send_sge);
- if (ndev->srq) {
- qp_attr.srq = ndev->srq;
+ if (queue->nsrq) {
+ qp_attr.srq = queue->nsrq->srq;
} else {
/* +1 for drain */
qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
}
+ if (queue->port->pi_enable && queue->host_qid)
+ qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
+
ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
if (ret) {
pr_err("failed to create_qp ret= %d\n", ret);
@@ -1041,7 +1306,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
__func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
qp_attr.cap.max_send_wr, queue->cm_id);
- if (!ndev->srq) {
+ if (!queue->nsrq) {
for (i = 0; i < queue->recv_queue_size; i++) {
queue->cmds[i].queue = queue;
ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
@@ -1076,7 +1341,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
nvmet_sq_destroy(&queue->nvme_sq);
nvmet_rdma_destroy_queue_ib(queue);
- if (!queue->dev->srq) {
+ if (!queue->nsrq) {
nvmet_rdma_free_cmds(queue->dev, queue->cmds,
queue->recv_queue_size,
!queue->host_qid);
@@ -1146,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
+ struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_queue *queue;
int ret;
@@ -1172,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
queue->dev = ndev;
queue->cm_id = cm_id;
+ queue->port = port->nport;
spin_lock_init(&queue->state_lock);
queue->state = NVMET_RDMA_Q_CONNECTING;
@@ -1188,13 +1455,23 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
goto out_destroy_sq;
}
+ /*
+ * Spread the io queues across completion vectors,
+ * but still keep all admin queues on vector 0.
+ */
+ queue->comp_vector = !queue->host_qid ? 0 :
+ queue->idx % ndev->device->num_comp_vectors;
+
+
ret = nvmet_rdma_alloc_rsps(queue);
if (ret) {
ret = NVME_RDMA_CM_NO_RSC;
goto out_ida_remove;
}
- if (!ndev->srq) {
+ if (ndev->srqs) {
+ queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count];
+ } else {
queue->cmds = nvmet_rdma_alloc_cmds(ndev,
queue->recv_queue_size,
!queue->host_qid);
@@ -1215,7 +1492,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
return queue;
out_free_cmds:
- if (!ndev->srq) {
+ if (!queue->nsrq) {
nvmet_rdma_free_cmds(queue->dev, queue->cmds,
queue->recv_queue_size,
!queue->host_qid);
@@ -1241,6 +1518,10 @@ static void nvmet_rdma_qp_event(struct ib_event *event, void *priv)
case IB_EVENT_COMM_EST:
rdma_notify(queue->cm_id, event->event);
break;
+ case IB_EVENT_QP_LAST_WQE_REACHED:
+ pr_debug("received last WQE reached event for queue=0x%p\n",
+ queue);
+ break;
default:
pr_err("received IB QP event: %s (%d)\n",
ib_event_msg(event->event), event->event);
@@ -1275,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,
static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
struct rdma_cm_event *event)
{
- struct nvmet_rdma_port *port = cm_id->context;
struct nvmet_rdma_device *ndev;
struct nvmet_rdma_queue *queue;
int ret = -EINVAL;
@@ -1291,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
ret = -ENOMEM;
goto put_device;
}
- queue->port = port->nport;
if (queue->host_qid == 0) {
/* Let inflight controller teardown complete */
@@ -1563,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port)
goto out_destroy_id;
}
+ if (port->nport->pi_enable &&
+ !(cm_id->device->attrs.device_cap_flags &
+ IB_DEVICE_INTEGRITY_HANDOVER)) {
+ pr_err("T10-PI is not supported for %pISpcs\n", addr);
+ ret = -EINVAL;
+ goto out_destroy_id;
+ }
+
port->cm_id = cm_id;
return 0;
@@ -1672,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req,
static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
{
+ if (ctrl->pi_support)
+ return NVMET_RDMA_MAX_METADATA_MDTS;
return NVMET_RDMA_MAX_MDTS;
}
@@ -1680,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
.type = NVMF_TRTYPE_RDMA,
.msdbd = 1,
.has_keyed_sgls = 1,
+ .metadata_support = 1,
.add_port = nvmet_rdma_add_port,
.remove_port = nvmet_rdma_remove_port,
.queue_response = nvmet_rdma_queue_response,
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f0da04e960f4..6f557db0320d 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue)
kernel_sock_shutdown(queue->sock, SHUT_RDWR);
}
+static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status)
+{
+ if (status == -EPIPE || status == -ECONNRESET)
+ kernel_sock_shutdown(queue->sock, SHUT_RDWR);
+ else
+ nvmet_tcp_fatal_error(queue);
+}
+
static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)
{
struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl;
@@ -510,7 +518,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd)
ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu),
offset_in_page(cmd->data_pdu) + cmd->offset,
- left, MSG_DONTWAIT | MSG_MORE);
+ left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST);
if (ret <= 0)
return ret;
@@ -538,7 +546,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if ((!last_in_batch && cmd->queue->send_list_len) ||
cmd->wbytes_done + left < cmd->req.transfer_len ||
queue->data_digest || !queue->nvme_sq.sqhd_disabled)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset,
left, flags);
@@ -585,7 +593,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@@ -614,7 +622,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
int ret;
if (!last_in_batch && cmd->queue->send_list_len)
- flags |= MSG_MORE;
+ flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST;
else
flags |= MSG_EOR;
@@ -644,6 +652,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch)
if (!last_in_batch && cmd->queue->send_list_len)
msg.msg_flags |= MSG_MORE;
+ else
+ msg.msg_flags |= MSG_EOR;
ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len);
if (unlikely(ret <= 0))
@@ -716,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_send_one(queue, i == budget - 1);
- if (ret <= 0)
+ if (unlikely(ret < 0)) {
+ nvmet_tcp_socket_error(queue, ret);
+ goto done;
+ } else if (ret == 0) {
break;
+ }
(*sends)++;
}
-
+done:
return ret;
}
@@ -1157,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue,
for (i = 0; i < budget; i++) {
ret = nvmet_tcp_try_recv_one(queue);
- if (ret <= 0)
+ if (unlikely(ret < 0)) {
+ nvmet_tcp_socket_error(queue, ret);
+ goto done;
+ } else if (ret == 0) {
break;
+ }
(*recvs)++;
}
-
+done:
return ret;
}
@@ -1186,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w)
pending = false;
ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops);
- if (ret > 0) {
+ if (ret > 0)
pending = true;
- } else if (ret < 0) {
- if (ret == -EPIPE || ret == -ECONNRESET)
- kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- else
- nvmet_tcp_fatal_error(queue);
+ else if (ret < 0)
return;
- }
ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops);
- if (ret > 0) {
- /* transmitted message/data */
+ if (ret > 0)
pending = true;
- } else if (ret < 0) {
- if (ret == -EPIPE || ret == -ECONNRESET)
- kernel_sock_shutdown(queue->sock, SHUT_RDWR);
- else
- nvmet_tcp_fatal_error(queue);
+ else if (ret < 0)
return;
- }
} while (pending && ops < NVMET_TCP_IO_WORK_BUDGET);
diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h
index e645caa882dd..0458046d6501 100644
--- a/drivers/nvme/target/trace.h
+++ b/drivers/nvme/target/trace.h
@@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete,
);
+#define aer_name(aer) { aer, #aer }
+
+TRACE_EVENT(nvmet_async_event,
+ TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result),
+ TP_ARGS(ctrl, result),
+ TP_STRUCT__entry(
+ __field(int, ctrl_id)
+ __field(u32, result)
+ ),
+ TP_fast_assign(
+ __entry->ctrl_id = ctrl->cntlid;
+ __entry->result = (le32_to_cpu(result) & 0xff00) >> 8;
+ ),
+ TP_printk("nvmet%d: NVME_AEN=%#08x [%s]",
+ __entry->ctrl_id, __entry->result,
+ __print_symbolic(__entry->result,
+ aer_name(NVME_AER_NOTICE_NS_CHANGED),
+ aer_name(NVME_AER_NOTICE_ANA),
+ aer_name(NVME_AER_NOTICE_FW_ACT_STARTING),
+ aer_name(NVME_AER_NOTICE_DISC_CHANGED),
+ aer_name(NVME_AER_ERROR),
+ aer_name(NVME_AER_SMART),
+ aer_name(NVME_AER_CSS),
+ aer_name(NVME_AER_VS))
+ )
+);
+#undef aer_name
+
#endif /* _TRACE_NVMET_H */
#undef TRACE_INCLUDE_PATH
diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c
index 9a5f3add325f..777734d1b4e5 100644
--- a/drivers/s390/block/dasd_ioctl.c
+++ b/drivers/s390/block/dasd_ioctl.c
@@ -22,6 +22,7 @@
#include <asm/schid.h>
#include <asm/cmb.h>
#include <linux/uaccess.h>
+#include <linux/dasd_mod.h>
/* This is ugly... */
#define PRINTK_HEADER "dasd_ioctl:"
@@ -457,10 +458,9 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp)
/*
* Return dasd information. Used for BIODASDINFO and BIODASDINFO2.
*/
-static int dasd_ioctl_information(struct dasd_block *block,
- unsigned int cmd, void __user *argp)
+static int __dasd_ioctl_information(struct dasd_block *block,
+ struct dasd_information2_t *dasd_info)
{
- struct dasd_information2_t *dasd_info;
struct subchannel_id sch_id;
struct ccw_dev_id dev_id;
struct dasd_device *base;
@@ -473,15 +473,9 @@ static int dasd_ioctl_information(struct dasd_block *block,
if (!base->discipline || !base->discipline->fill_info)
return -EINVAL;
- dasd_info = kzalloc(sizeof(struct dasd_information2_t), GFP_KERNEL);
- if (dasd_info == NULL)
- return -ENOMEM;
-
rc = base->discipline->fill_info(base, dasd_info);
- if (rc) {
- kfree(dasd_info);
+ if (rc)
return rc;
- }
cdev = base->cdev;
ccw_device_get_id(cdev, &dev_id);
@@ -520,15 +514,24 @@ static int dasd_ioctl_information(struct dasd_block *block,
list_for_each(l, &base->ccw_queue)
dasd_info->chanq_len++;
spin_unlock_irqrestore(&block->queue_lock, flags);
+ return 0;
+}
- rc = 0;
- if (copy_to_user(argp, dasd_info,
- ((cmd == (unsigned int) BIODASDINFO2) ?
- sizeof(struct dasd_information2_t) :
- sizeof(struct dasd_information_t))))
- rc = -EFAULT;
+static int dasd_ioctl_information(struct dasd_block *block, void __user *argp,
+ size_t copy_size)
+{
+ struct dasd_information2_t *dasd_info;
+ int error;
+
+ dasd_info = kzalloc(sizeof(*dasd_info), GFP_KERNEL);
+ if (!dasd_info)
+ return -ENOMEM;
+
+ error = __dasd_ioctl_information(block, dasd_info);
+ if (!error && copy_to_user(argp, dasd_info, copy_size))
+ error = -EFAULT;
kfree(dasd_info);
- return rc;
+ return error;
}
/*
@@ -622,10 +625,12 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode,
rc = dasd_ioctl_check_format(bdev, argp);
break;
case BIODASDINFO:
- rc = dasd_ioctl_information(block, cmd, argp);
+ rc = dasd_ioctl_information(block, argp,
+ sizeof(struct dasd_information_t));
break;
case BIODASDINFO2:
- rc = dasd_ioctl_information(block, cmd, argp);
+ rc = dasd_ioctl_information(block, argp,
+ sizeof(struct dasd_information2_t));
break;
case BIODASDPRRD:
rc = dasd_ioctl_read_profile(block, argp);
@@ -660,3 +665,36 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode,
dasd_put_device(base);
return rc;
}
+
+
+/**
+ * dasd_biodasdinfo() - fill out the dasd information structure
+ * @disk [in]: pointer to gendisk structure that references a DASD
+ * @info [out]: pointer to the dasd_information2_t structure
+ *
+ * Provide access to DASD specific information.
+ * The gendisk structure is checked if it belongs to the DASD driver by
+ * comparing the gendisk->fops pointer.
+ * If it does not belong to the DASD driver -EINVAL is returned.
+ * Otherwise the provided dasd_information2_t structure is filled out.
+ *
+ * Returns:
+ * %0 on success and a negative error value on failure.
+ */
+int dasd_biodasdinfo(struct gendisk *disk, struct dasd_information2_t *info)
+{
+ struct dasd_device *base;
+ int error;
+
+ if (disk->fops != &dasd_device_operations)
+ return -EINVAL;
+
+ base = dasd_device_from_gendisk(disk);
+ if (!base)
+ return -ENODEV;
+ error = __dasd_ioctl_information(base->block, info);
+ dasd_put_device(base);
+ return error;
+}
+/* export that symbol_get in partition detection is possible */
+EXPORT_SYMBOL_GPL(dasd_biodasdinfo);
diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h
index 8e2a356911a9..62e96d4fdcc6 100644
--- a/drivers/scsi/lpfc/lpfc.h
+++ b/drivers/scsi/lpfc/lpfc.h
@@ -143,7 +143,7 @@ struct lpfc_dmabuf {
struct lpfc_nvmet_ctxbuf {
struct list_head list;
- struct lpfc_nvmet_rcv_ctx *context;
+ struct lpfc_async_xchg_ctx *context;
struct lpfc_iocbq *iocbq;
struct lpfc_sglq *sglq;
struct work_struct defer_work;
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 1354c141d614..f089867674cb 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -37,8 +37,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme-fc-driver.h>
-
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@@ -48,7 +46,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
#include "lpfc_logmsg.h"
#include "lpfc_version.h"
#include "lpfc_compat.h"
diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h
index 76dc8d9493d2..9ee6b930a655 100644
--- a/drivers/scsi/lpfc/lpfc_crtn.h
+++ b/drivers/scsi/lpfc/lpfc_crtn.h
@@ -24,7 +24,6 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *);
struct fc_rport;
struct fc_frame_header;
-struct lpfc_nvmet_rcv_ctx;
void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *);
void lpfc_sli_read_link_ste(struct lpfc_hba *);
void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t);
@@ -564,12 +563,16 @@ void lpfc_nvme_update_localport(struct lpfc_vport *vport);
int lpfc_nvmet_create_targetport(struct lpfc_hba *phba);
int lpfc_nvmet_update_targetport(struct lpfc_hba *phba);
void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba);
-void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba,
- struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb);
+int lpfc_nvme_handle_lsreq(struct lpfc_hba *phba,
+ struct lpfc_async_xchg_ctx *axchg);
+int lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba,
+ struct lpfc_async_xchg_ctx *axchg);
void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx,
struct rqb_dmabuf *nvmebuf, uint64_t isr_ts,
uint8_t cqflag);
void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba);
+void lpfc_nvmet_invalidate_host(struct lpfc_hba *phba,
+ struct lpfc_nodelist *ndlp);
void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdiocb,
struct lpfc_wcqe_complete *abts_cmpl);
diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c
index 2aa578d20f8c..196f6ae9952e 100644
--- a/drivers/scsi/lpfc/lpfc_ct.c
+++ b/drivers/scsi/lpfc/lpfc_ct.c
@@ -44,7 +44,6 @@
#include "lpfc_disc.h"
#include "lpfc.h"
#include "lpfc_scsi.h"
-#include "lpfc_nvme.h"
#include "lpfc_logmsg.h"
#include "lpfc_crtn.h"
#include "lpfc_version.h"
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 5a754fb5f854..4daae90e0c99 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -39,8 +39,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme-fc-driver.h>
-
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@@ -50,7 +48,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
#include "lpfc_logmsg.h"
#include "lpfc_crtn.h"
#include "lpfc_vport.h"
@@ -1035,7 +1032,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size)
{
struct lpfc_hba *phba = vport->phba;
struct lpfc_nvmet_tgtport *tgtp;
- struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+ struct lpfc_async_xchg_ctx *ctxp, *next_ctxp;
struct nvme_fc_local_port *localport;
struct lpfc_fc4_ctrl_stat *cstat;
struct lpfc_nvme_lport *lport;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index 789eecbf32eb..f5952f8cd4b5 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -36,8 +36,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme-fc-driver.h>
-
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_nl.h"
@@ -825,6 +823,12 @@ lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove)
if ((phba->sli_rev < LPFC_SLI_REV4) &&
(!remove && ndlp->nlp_type & NLP_FABRIC))
continue;
+
+ /* Notify transport of connectivity loss to trigger cleanup. */
+ if (phba->nvmet_support &&
+ ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
+ lpfc_nvmet_invalidate_host(phba, ndlp);
+
lpfc_disc_state_machine(vport, ndlp, NULL,
remove
? NLP_EVT_DEVICE_RM
diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c
index 4104bdcdbb6f..ea99483345f2 100644
--- a/drivers/scsi/lpfc/lpfc_init.c
+++ b/drivers/scsi/lpfc/lpfc_init.c
@@ -50,8 +50,6 @@
#include <scsi/scsi_tcq.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme-fc-driver.h>
-
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@@ -61,7 +59,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
#include "lpfc_logmsg.h"
#include "lpfc_crtn.h"
#include "lpfc_vport.h"
@@ -1032,7 +1029,7 @@ static int
lpfc_hba_down_post_s4(struct lpfc_hba *phba)
{
struct lpfc_io_buf *psb, *psb_next;
- struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next;
+ struct lpfc_async_xchg_ctx *ctxp, *ctxp_next;
struct lpfc_sli4_hdw_queue *qp;
LIST_HEAD(aborts);
LIST_HEAD(nvme_aborts);
@@ -1099,7 +1096,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba)
&nvmet_aborts);
spin_unlock_irq(&phba->sli4_hba.abts_nvmet_buf_list_lock);
list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) {
- ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP);
+ ctxp->flag &= ~(LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP);
lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
}
}
diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c
index 7082279e4c01..726f6619230f 100644
--- a/drivers/scsi/lpfc/lpfc_mem.c
+++ b/drivers/scsi/lpfc/lpfc_mem.c
@@ -31,8 +31,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme-fc-driver.h>
-
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@@ -41,8 +39,6 @@
#include "lpfc_disc.h"
#include "lpfc.h"
#include "lpfc_scsi.h"
-#include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
#include "lpfc_crtn.h"
#include "lpfc_logmsg.h"
diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c
index a024e5a3918f..d8501bd959e7 100644
--- a/drivers/scsi/lpfc/lpfc_nportdisc.c
+++ b/drivers/scsi/lpfc/lpfc_nportdisc.c
@@ -32,8 +32,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme-fc-driver.h>
-
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@@ -491,6 +489,11 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
(unsigned long long)
wwn_to_u64(sp->portName.u.wwn));
+ /* Notify transport of connectivity loss to trigger cleanup. */
+ if (phba->nvmet_support &&
+ ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
+ lpfc_nvmet_invalidate_host(phba, ndlp);
+
ndlp->nlp_prev_state = ndlp->nlp_state;
/* rport needs to be unregistered first */
lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE);
@@ -841,6 +844,12 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL);
else
lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL);
+
+ /* Notify transport of connectivity loss to trigger cleanup. */
+ if (phba->nvmet_support &&
+ ndlp->nlp_state == NLP_STE_UNMAPPED_NODE)
+ lpfc_nvmet_invalidate_host(phba, ndlp);
+
if (ndlp->nlp_DID == Fabric_DID) {
if (vport->port_state <= LPFC_FDISC)
goto out;
diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c
index a45936e08031..b46ba70f78da 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.c
+++ b/drivers/scsi/lpfc/lpfc_nvme.c
@@ -36,9 +36,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme.h>
-#include <linux/nvme-fc-driver.h>
-#include <linux/nvme-fc.h>
#include "lpfc_version.h"
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
@@ -396,43 +393,100 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport)
return;
}
-static void
-lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
- struct lpfc_wcqe_complete *wcqe)
+/**
+ * lpfc_nvme_handle_lsreq - Process an unsolicited NVME LS request
+ * @phba: pointer to lpfc hba data structure.
+ * @axchg: pointer to exchange context for the NVME LS request
+ *
+ * This routine is used for processing an asychronously received NVME LS
+ * request. Any remaining validation is done and the LS is then forwarded
+ * to the nvme-fc transport via nvme_fc_rcv_ls_req().
+ *
+ * The calling sequence should be: nvme_fc_rcv_ls_req() -> (processing)
+ * -> lpfc_nvme_xmt_ls_rsp/cmp -> req->done.
+ * __lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg.
+ *
+ * Returns 0 if LS was handled and delivered to the transport
+ * Returns 1 if LS failed to be handled and should be dropped
+ */
+int
+lpfc_nvme_handle_lsreq(struct lpfc_hba *phba,
+ struct lpfc_async_xchg_ctx *axchg)
{
- struct lpfc_vport *vport = cmdwqe->vport;
+#if (IS_ENABLED(CONFIG_NVME_FC))
+ struct lpfc_vport *vport;
+ struct lpfc_nvme_rport *lpfc_rport;
+ struct nvme_fc_remote_port *remoteport;
struct lpfc_nvme_lport *lport;
- uint32_t status;
+ uint32_t *payload = axchg->payload;
+ int rc;
+
+ vport = axchg->ndlp->vport;
+ lpfc_rport = axchg->ndlp->nrport;
+ if (!lpfc_rport)
+ return -EINVAL;
+
+ remoteport = lpfc_rport->remoteport;
+ if (!vport->localport)
+ return -EINVAL;
+
+ lport = vport->localport->private;
+ if (!lport)
+ return -EINVAL;
+
+ rc = nvme_fc_rcv_ls_req(remoteport, &axchg->ls_rsp, axchg->payload,
+ axchg->size);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+ "6205 NVME Unsol rcv: sz %d rc %d: %08x %08x %08x "
+ "%08x %08x %08x\n",
+ axchg->size, rc,
+ *payload, *(payload+1), *(payload+2),
+ *(payload+3), *(payload+4), *(payload+5));
+
+ if (!rc)
+ return 0;
+#endif
+ return 1;
+}
+
+/**
+ * __lpfc_nvme_ls_req_cmp - Generic completion handler for a NVME
+ * LS request.
+ * @phba: Pointer to HBA context object
+ * @vport: The local port that issued the LS
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * This function is the generic completion handler for NVME LS requests.
+ * The function updates any states and statistics, calls the transport
+ * ls_req done() routine, then tears down the command and buffers used
+ * for the LS request.
+ **/
+void
+__lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport,
+ struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe)
+{
struct nvmefc_ls_req *pnvme_lsreq;
struct lpfc_dmabuf *buf_ptr;
struct lpfc_nodelist *ndlp;
+ uint32_t status;
pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2;
+ ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
- if (vport->localport) {
- lport = (struct lpfc_nvme_lport *)vport->localport->private;
- if (lport) {
- atomic_inc(&lport->fc4NvmeLsCmpls);
- if (status) {
- if (bf_get(lpfc_wcqe_c_xb, wcqe))
- atomic_inc(&lport->cmpl_ls_xb);
- atomic_inc(&lport->cmpl_ls_err);
- }
- }
- }
-
- ndlp = (struct lpfc_nodelist *)cmdwqe->context1;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
- "6047 nvme cmpl Enter "
- "Data %px DID %x Xri: %x status %x reason x%x "
- "cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n",
+ "6047 NVMEx LS REQ %px cmpl DID %x Xri: %x "
+ "status %x reason x%x cmd:x%px lsreg:x%px bmp:x%px "
+ "ndlp:x%px\n",
pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
cmdwqe->sli4_xritag, status,
(wcqe->parameter & 0xffff),
cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp);
- lpfc_nvmeio_data(phba, "NVME LS CMPL: xri x%x stat x%x parm x%x\n",
+ lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x parm x%x\n",
cmdwqe->sli4_xritag, status, wcqe->parameter);
if (cmdwqe->context3) {
@@ -445,7 +499,7 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
pnvme_lsreq->done(pnvme_lsreq, status);
else
lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
- "6046 nvme cmpl without done call back? "
+ "6046 NVMEx cmpl without done call back? "
"Data %px DID %x Xri: %x status %x\n",
pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0,
cmdwqe->sli4_xritag, status);
@@ -456,6 +510,31 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
lpfc_sli_release_iocbq(phba, cmdwqe);
}
+static void
+lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe)
+{
+ struct lpfc_vport *vport = cmdwqe->vport;
+ struct lpfc_nvme_lport *lport;
+ uint32_t status;
+
+ status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+
+ if (vport->localport) {
+ lport = (struct lpfc_nvme_lport *)vport->localport->private;
+ if (lport) {
+ atomic_inc(&lport->fc4NvmeLsCmpls);
+ if (status) {
+ if (bf_get(lpfc_wcqe_c_xb, wcqe))
+ atomic_inc(&lport->cmpl_ls_xb);
+ atomic_inc(&lport->cmpl_ls_err);
+ }
+ }
+ }
+
+ __lpfc_nvme_ls_req_cmp(phba, vport, cmdwqe, wcqe);
+}
+
static int
lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
struct lpfc_dmabuf *inp,
@@ -557,13 +636,6 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
/* Issue GEN REQ WQE for NPORT <did> */
- lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS,
- "6050 Issue GEN REQ WQE to NPORT x%x "
- "Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px "
- "xmit:%d 1st:%d\n",
- ndlp->nlp_DID, genwqe->iotag,
- vport->port_state,
- genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
genwqe->wqe_cmpl = cmpl;
genwqe->iocb_cmpl = NULL;
genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT;
@@ -575,105 +647,108 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp,
rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe);
if (rc) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS,
+ lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC | LOG_ELS,
"6045 Issue GEN REQ WQE to NPORT x%x "
- "Data: x%x x%x\n",
+ "Data: x%x x%x rc x%x\n",
ndlp->nlp_DID, genwqe->iotag,
- vport->port_state);
+ vport->port_state, rc);
lpfc_sli_release_iocbq(phba, genwqe);
return 1;
}
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_ELS,
+ "6050 Issue GEN REQ WQE to NPORT x%x "
+ "Data: oxid: x%x state: x%x wq:x%px lsreq:x%px "
+ "bmp:x%px xmit:%d 1st:%d\n",
+ ndlp->nlp_DID, genwqe->sli4_xritag,
+ vport->port_state,
+ genwqe, pnvme_lsreq, bmp, xmit_len, first_len);
return 0;
}
+
/**
- * lpfc_nvme_ls_req - Issue an Link Service request
- * @lpfc_pnvme: Pointer to the driver's nvme instance data
- * @lpfc_nvme_lport: Pointer to the driver's local port data
- * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * __lpfc_nvme_ls_req - Generic service routine to issue an NVME LS request
+ * @vport: The local port issuing the LS
+ * @ndlp: The remote port to send the LS to
+ * @pnvme_lsreq: Pointer to LS request structure from the transport
*
- * Driver registers this routine to handle any link service request
- * from the nvme_fc transport to a remote nvme-aware port.
+ * Routine validates the ndlp, builds buffers and sends a GEN_REQUEST
+ * WQE to perform the LS operation.
*
* Return value :
* 0 - Success
- * TODO: What are the failure codes.
+ * non-zero: various error codes, in form of -Exxx
**/
-static int
-lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
- struct nvme_fc_remote_port *pnvme_rport,
- struct nvmefc_ls_req *pnvme_lsreq)
+int
+__lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ struct nvmefc_ls_req *pnvme_lsreq,
+ void (*gen_req_cmp)(struct lpfc_hba *phba,
+ struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe))
{
- int ret = 0;
- struct lpfc_nvme_lport *lport;
- struct lpfc_nvme_rport *rport;
- struct lpfc_vport *vport;
- struct lpfc_nodelist *ndlp;
- struct ulp_bde64 *bpl;
struct lpfc_dmabuf *bmp;
+ struct ulp_bde64 *bpl;
+ int ret;
uint16_t ntype, nstate;
- /* there are two dma buf in the request, actually there is one and
- * the second one is just the start address + cmd size.
- * Before calling lpfc_nvme_gen_req these buffers need to be wrapped
- * in a lpfc_dmabuf struct. When freeing we just free the wrapper
- * because the nvem layer owns the data bufs.
- * We do not have to break these packets open, we don't care what is in
- * them. And we do not have to look at the resonse data, we only care
- * that we got a response. All of the caring is going to happen in the
- * nvme-fc layer.
- */
-
- lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
- rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
- if (unlikely(!lport) || unlikely(!rport))
- return -EINVAL;
-
- vport = lport->vport;
-
- if (vport->load_flag & FC_UNLOADING)
- return -ENODEV;
-
- /* Need the ndlp. It is stored in the driver's rport. */
- ndlp = rport->ndlp;
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
- "6051 Remoteport x%px, rport has invalid ndlp. "
- "Failing LS Req\n", pnvme_rport);
+ lpfc_printf_vlog(vport, KERN_ERR,
+ LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR,
+ "6051 NVMEx LS REQ: Bad NDLP x%px, Failing "
+ "LS Req\n",
+ ndlp);
return -ENODEV;
}
- /* The remote node has to be a mapped nvme target or an
- * unmapped nvme initiator or it's an error.
- */
ntype = ndlp->nlp_type;
nstate = ndlp->nlp_state;
if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) ||
(ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR,
- "6088 DID x%06x not ready for "
- "IO. State x%x, Type x%x\n",
- pnvme_rport->port_id,
- ndlp->nlp_state, ndlp->nlp_type);
+ lpfc_printf_vlog(vport, KERN_ERR,
+ LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR,
+ "6088 NVMEx LS REQ: Fail DID x%06x not "
+ "ready for IO. Type x%x, State x%x\n",
+ ndlp->nlp_DID, ntype, nstate);
return -ENODEV;
}
- bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL);
+
+ /*
+ * there are two dma buf in the request, actually there is one and
+ * the second one is just the start address + cmd size.
+ * Before calling lpfc_nvme_gen_req these buffers need to be wrapped
+ * in a lpfc_dmabuf struct. When freeing we just free the wrapper
+ * because the nvem layer owns the data bufs.
+ * We do not have to break these packets open, we don't care what is
+ * in them. And we do not have to look at the resonse data, we only
+ * care that we got a response. All of the caring is going to happen
+ * in the nvme-fc layer.
+ */
+
+ bmp = kmalloc(sizeof(*bmp), GFP_KERNEL);
if (!bmp) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
- "6044 Could not find node for DID %x\n",
- pnvme_rport->port_id);
- return 2;
+ lpfc_printf_vlog(vport, KERN_ERR,
+ LOG_NVME_DISC | LOG_NVME_IOERR,
+ "6044 NVMEx LS REQ: Could not alloc LS buf "
+ "for DID %x\n",
+ ndlp->nlp_DID);
+ return -ENOMEM;
}
- INIT_LIST_HEAD(&bmp->list);
+
bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys));
if (!bmp->virt) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
- "6042 Could not find node for DID %x\n",
- pnvme_rport->port_id);
+ lpfc_printf_vlog(vport, KERN_ERR,
+ LOG_NVME_DISC | LOG_NVME_IOERR,
+ "6042 NVMEx LS REQ: Could not alloc mbuf "
+ "for DID %x\n",
+ ndlp->nlp_DID);
kfree(bmp);
- return 3;
+ return -ENOMEM;
}
+
+ INIT_LIST_HEAD(&bmp->list);
+
bpl = (struct ulp_bde64 *)bmp->virt;
bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma));
bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma));
@@ -688,118 +763,206 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
bpl->tus.f.bdeSize = pnvme_lsreq->rsplen;
bpl->tus.w = le32_to_cpu(bpl->tus.w);
- /* Expand print to include key fields. */
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC,
- "6149 Issue LS Req to DID 0x%06x lport x%px, "
- "rport x%px lsreq x%px rqstlen:%d rsplen:%d "
- "%pad %pad\n",
- ndlp->nlp_DID, pnvme_lport, pnvme_rport,
- pnvme_lsreq, pnvme_lsreq->rqstlen,
- pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
- &pnvme_lsreq->rspdma);
-
- atomic_inc(&lport->fc4NvmeLsRequests);
+ "6149 NVMEx LS REQ: Issue to DID 0x%06x lsreq x%px, "
+ "rqstlen:%d rsplen:%d %pad %pad\n",
+ ndlp->nlp_DID, pnvme_lsreq, pnvme_lsreq->rqstlen,
+ pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
+ &pnvme_lsreq->rspdma);
- /* Hardcode the wait to 30 seconds. Connections are failing otherwise.
- * This code allows it all to work.
- */
ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr,
- pnvme_lsreq, lpfc_nvme_cmpl_gen_req,
- ndlp, 2, 30, 0);
+ pnvme_lsreq, gen_req_cmp, ndlp, 2,
+ LPFC_NVME_LS_TIMEOUT, 0);
if (ret != WQE_SUCCESS) {
- atomic_inc(&lport->xmt_ls_err);
- lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC,
- "6052 EXIT. issue ls wqe failed lport x%px, "
- "rport x%px lsreq x%px Status %x DID %x\n",
- pnvme_lport, pnvme_rport, pnvme_lsreq,
- ret, ndlp->nlp_DID);
+ lpfc_printf_vlog(vport, KERN_ERR,
+ LOG_NVME_DISC | LOG_NVME_IOERR,
+ "6052 NVMEx REQ: EXIT. issue ls wqe failed "
+ "lsreq x%px Status %x DID %x\n",
+ pnvme_lsreq, ret, ndlp->nlp_DID);
lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys);
kfree(bmp);
- return ret;
+ return -EIO;
}
- /* Stub in routine and return 0 for now. */
- return ret;
+ return 0;
}
/**
- * lpfc_nvme_ls_abort - Issue an Link Service request
- * @lpfc_pnvme: Pointer to the driver's nvme instance data
- * @lpfc_nvme_lport: Pointer to the driver's local port data
- * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq
+ * lpfc_nvme_ls_req - Issue an NVME Link Service request
+ * @lpfc_nvme_lport: Transport localport that LS is to be issued from.
+ * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for the LS
*
* Driver registers this routine to handle any link service request
* from the nvme_fc transport to a remote nvme-aware port.
*
* Return value :
* 0 - Success
- * TODO: What are the failure codes.
+ * non-zero: various error codes, in form of -Exxx
**/
-static void
-lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
- struct nvme_fc_remote_port *pnvme_rport,
- struct nvmefc_ls_req *pnvme_lsreq)
+static int
+lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport,
+ struct nvme_fc_remote_port *pnvme_rport,
+ struct nvmefc_ls_req *pnvme_lsreq)
{
struct lpfc_nvme_lport *lport;
+ struct lpfc_nvme_rport *rport;
struct lpfc_vport *vport;
- struct lpfc_hba *phba;
- struct lpfc_nodelist *ndlp;
- LIST_HEAD(abort_list);
- struct lpfc_sli_ring *pring;
- struct lpfc_iocbq *wqe, *next_wqe;
+ int ret;
lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
- if (unlikely(!lport))
- return;
- vport = lport->vport;
- phba = vport->phba;
+ rport = (struct lpfc_nvme_rport *)pnvme_rport->private;
+ if (unlikely(!lport) || unlikely(!rport))
+ return -EINVAL;
+ vport = lport->vport;
if (vport->load_flag & FC_UNLOADING)
- return;
+ return -ENODEV;
+
+ atomic_inc(&lport->fc4NvmeLsRequests);
+
+ ret = __lpfc_nvme_ls_req(vport, rport->ndlp, pnvme_lsreq,
+ lpfc_nvme_ls_req_cmp);
+ if (ret)
+ atomic_inc(&lport->xmt_ls_err);
+
+ return ret;
+}
+
+/**
+ * __lpfc_nvme_ls_abort - Generic service routine to abort a prior
+ * NVME LS request
+ * @vport: The local port that issued the LS
+ * @ndlp: The remote port the LS was sent to
+ * @pnvme_lsreq: Pointer to LS request structure from the transport
+ *
+ * The driver validates the ndlp, looks for the LS, and aborts the
+ * LS if found.
+ *
+ * Returns:
+ * 0 : if LS found and aborted
+ * non-zero: various error conditions in form -Exxx
+ **/
+int
+__lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ struct nvmefc_ls_req *pnvme_lsreq)
+{
+ struct lpfc_hba *phba = vport->phba;
+ struct lpfc_sli_ring *pring;
+ struct lpfc_iocbq *wqe, *next_wqe;
+ bool foundit = false;
- ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
if (!ndlp) {
- lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS,
- "6049 Could not find node for DID %x\n",
- pnvme_rport->port_id);
- return;
+ lpfc_printf_log(phba, KERN_ERR,
+ LOG_NVME_DISC | LOG_NODE |
+ LOG_NVME_IOERR | LOG_NVME_ABTS,
+ "6049 NVMEx LS REQ Abort: Bad NDLP x%px DID "
+ "x%06x, Failing LS Req\n",
+ ndlp, ndlp ? ndlp->nlp_DID : 0);
+ return -EINVAL;
}
- /* Expand print to include key fields. */
- lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
- "6040 ENTER. lport x%px, rport x%px lsreq x%px rqstlen:%d "
- "rsplen:%d %pad %pad\n",
- pnvme_lport, pnvme_rport,
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
+ "6040 NVMEx LS REQ Abort: Issue LS_ABORT for lsreq "
+ "x%p rqstlen:%d rsplen:%d %pad %pad\n",
pnvme_lsreq, pnvme_lsreq->rqstlen,
pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma,
&pnvme_lsreq->rspdma);
/*
- * Lock the ELS ring txcmplq and build a local list of all ELS IOs
- * that need an ABTS. The IOs need to stay on the txcmplq so that
- * the abort operation completes them successfully.
+ * Lock the ELS ring txcmplq and look for the wqe that matches
+ * this ELS. If found, issue an abort on the wqe.
*/
pring = phba->sli4_hba.nvmels_wq->pring;
spin_lock_irq(&phba->hbalock);
spin_lock(&pring->ring_lock);
list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) {
- /* Add to abort_list on on NDLP match. */
- if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) {
+ if (wqe->context2 == pnvme_lsreq) {
wqe->iocb_flag |= LPFC_DRIVER_ABORTED;
- list_add_tail(&wqe->dlist, &abort_list);
+ foundit = true;
+ break;
}
}
spin_unlock(&pring->ring_lock);
- spin_unlock_irq(&phba->hbalock);
- /* Abort the targeted IOs and remove them from the abort list. */
- list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) {
- atomic_inc(&lport->xmt_ls_abort);
- spin_lock_irq(&phba->hbalock);
- list_del_init(&wqe->dlist);
+ if (foundit)
lpfc_sli_issue_abort_iotag(phba, pring, wqe);
- spin_unlock_irq(&phba->hbalock);
+ spin_unlock_irq(&phba->hbalock);
+
+ if (foundit)
+ return 0;
+
+ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS,
+ "6213 NVMEx LS REQ Abort: Unable to locate req x%p\n",
+ pnvme_lsreq);
+ return -EINVAL;
+}
+
+static int
+lpfc_nvme_xmt_ls_rsp(struct nvme_fc_local_port *localport,
+ struct nvme_fc_remote_port *remoteport,
+ struct nvmefc_ls_rsp *ls_rsp)
+{
+ struct lpfc_async_xchg_ctx *axchg =
+ container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp);
+ struct lpfc_nvme_lport *lport;
+ int rc;
+
+ if (axchg->phba->pport->load_flag & FC_UNLOADING)
+ return -ENODEV;
+
+ lport = (struct lpfc_nvme_lport *)localport->private;
+
+ rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, __lpfc_nvme_xmt_ls_rsp_cmp);
+
+ if (rc) {
+ /*
+ * unless the failure is due to having already sent
+ * the response, an abort will be generated for the
+ * exchange if the rsp can't be sent.
+ */
+ if (rc != -EALREADY)
+ atomic_inc(&lport->xmt_ls_abort);
+ return rc;
}
+
+ return 0;
+}
+
+/**
+ * lpfc_nvme_ls_abort - Abort a prior NVME LS request
+ * @lpfc_nvme_lport: Transport localport that LS is to be issued from.
+ * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for the LS
+ *
+ * Driver registers this routine to abort a NVME LS request that is
+ * in progress (from the transports perspective).
+ **/
+static void
+lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport,
+ struct nvme_fc_remote_port *pnvme_rport,
+ struct nvmefc_ls_req *pnvme_lsreq)
+{
+ struct lpfc_nvme_lport *lport;
+ struct lpfc_vport *vport;
+ struct lpfc_hba *phba;
+ struct lpfc_nodelist *ndlp;
+ int ret;
+
+ lport = (struct lpfc_nvme_lport *)pnvme_lport->private;
+ if (unlikely(!lport))
+ return;
+ vport = lport->vport;
+ phba = vport->phba;
+
+ if (vport->load_flag & FC_UNLOADING)
+ return;
+
+ ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id);
+
+ ret = __lpfc_nvme_ls_abort(vport, ndlp, pnvme_lsreq);
+ if (!ret)
+ atomic_inc(&lport->xmt_ls_abort);
}
/* Fix up the existing sgls for NVME IO. */
@@ -1911,6 +2074,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = {
.fcp_io = lpfc_nvme_fcp_io_submit,
.ls_abort = lpfc_nvme_ls_abort,
.fcp_abort = lpfc_nvme_fcp_abort,
+ .xmt_ls_rsp = lpfc_nvme_xmt_ls_rsp,
.max_hw_queues = 1,
.max_sgl_segments = LPFC_NVME_DEFAULT_SEGS,
@@ -2106,6 +2270,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport)
atomic_set(&lport->cmpl_fcp_err, 0);
atomic_set(&lport->cmpl_ls_xb, 0);
atomic_set(&lport->cmpl_ls_err, 0);
+
atomic_set(&lport->fc4NvmeLsRequests, 0);
atomic_set(&lport->fc4NvmeLsCmpls, 0);
}
diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h
index 593c48ff634e..4a4c3f780e1f 100644
--- a/drivers/scsi/lpfc/lpfc_nvme.h
+++ b/drivers/scsi/lpfc/lpfc_nvme.h
@@ -21,6 +21,10 @@
* included with this package. *
********************************************************************/
+#include <linux/nvme.h>
+#include <linux/nvme-fc-driver.h>
+#include <linux/nvme-fc.h>
+
#define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */
#define LPFC_NVME_ERSP_LEN 0x20
@@ -74,3 +78,179 @@ struct lpfc_nvme_rport {
struct lpfc_nvme_fcpreq_priv {
struct lpfc_io_buf *nvme_buf;
};
+
+/*
+ * set NVME LS request timeouts to 30s. It is larger than the 2*R_A_TOV
+ * set by the spec, which appears to have issues with some devices.
+ */
+#define LPFC_NVME_LS_TIMEOUT 30
+
+
+#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */
+#define LPFC_NVMET_RQE_MIN_POST 128
+#define LPFC_NVMET_RQE_DEF_POST 512
+#define LPFC_NVMET_RQE_DEF_COUNT 2048
+#define LPFC_NVMET_SUCCESS_LEN 12
+
+#define LPFC_NVMET_MRQ_AUTO 0
+#define LPFC_NVMET_MRQ_MAX 16
+
+#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC)
+
+/* Used for NVME Target */
+#define LPFC_NVMET_INV_HOST_ACTIVE 1
+
+struct lpfc_nvmet_tgtport {
+ struct lpfc_hba *phba;
+ struct completion *tport_unreg_cmp;
+ atomic_t state; /* tracks nvmet hosthandle invalidation */
+
+ /* Stats counters - lpfc_nvmet_unsol_ls_buffer */
+ atomic_t rcv_ls_req_in;
+ atomic_t rcv_ls_req_out;
+ atomic_t rcv_ls_req_drop;
+ atomic_t xmt_ls_abort;
+ atomic_t xmt_ls_abort_cmpl;
+
+ /* Stats counters - lpfc_nvmet_xmt_ls_rsp */
+ atomic_t xmt_ls_rsp;
+ atomic_t xmt_ls_drop;
+
+ /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
+ atomic_t xmt_ls_rsp_error;
+ atomic_t xmt_ls_rsp_aborted;
+ atomic_t xmt_ls_rsp_xb_set;
+ atomic_t xmt_ls_rsp_cmpl;
+
+ /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
+ atomic_t rcv_fcp_cmd_in;
+ atomic_t rcv_fcp_cmd_out;
+ atomic_t rcv_fcp_cmd_drop;
+ atomic_t rcv_fcp_cmd_defer;
+ atomic_t xmt_fcp_release;
+
+ /* Stats counters - lpfc_nvmet_xmt_fcp_op */
+ atomic_t xmt_fcp_drop;
+ atomic_t xmt_fcp_read_rsp;
+ atomic_t xmt_fcp_read;
+ atomic_t xmt_fcp_write;
+ atomic_t xmt_fcp_rsp;
+
+ /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
+ atomic_t xmt_fcp_rsp_xb_set;
+ atomic_t xmt_fcp_rsp_cmpl;
+ atomic_t xmt_fcp_rsp_error;
+ atomic_t xmt_fcp_rsp_aborted;
+ atomic_t xmt_fcp_rsp_drop;
+
+ /* Stats counters - lpfc_nvmet_xmt_fcp_abort */
+ atomic_t xmt_fcp_xri_abort_cqe;
+ atomic_t xmt_fcp_abort;
+ atomic_t xmt_fcp_abort_cmpl;
+ atomic_t xmt_abort_sol;
+ atomic_t xmt_abort_unsol;
+ atomic_t xmt_abort_rsp;
+ atomic_t xmt_abort_rsp_error;
+
+ /* Stats counters - defer IO */
+ atomic_t defer_ctx;
+ atomic_t defer_fod;
+ atomic_t defer_wqfull;
+};
+
+struct lpfc_nvmet_ctx_info {
+ struct list_head nvmet_ctx_list;
+ spinlock_t nvmet_ctx_list_lock; /* lock per CPU */
+ struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu;
+ struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu;
+ uint16_t nvmet_ctx_list_cnt;
+ char pad[16]; /* pad to a cache-line */
+};
+
+/* This retrieves the context info associated with the specified cpu / mrq */
+#define lpfc_get_ctx_list(phba, cpu, mrq) \
+ (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq))
+
+/* Values for state field of struct lpfc_async_xchg_ctx */
+#define LPFC_NVME_STE_LS_RCV 1
+#define LPFC_NVME_STE_LS_ABORT 2
+#define LPFC_NVME_STE_LS_RSP 3
+#define LPFC_NVME_STE_RCV 4
+#define LPFC_NVME_STE_DATA 5
+#define LPFC_NVME_STE_ABORT 6
+#define LPFC_NVME_STE_DONE 7
+#define LPFC_NVME_STE_FREE 0xff
+
+/* Values for flag field of struct lpfc_async_xchg_ctx */
+#define LPFC_NVME_IO_INP 0x1 /* IO is in progress on exchange */
+#define LPFC_NVME_ABORT_OP 0x2 /* Abort WQE issued on exchange */
+#define LPFC_NVME_XBUSY 0x4 /* XB bit set on IO cmpl */
+#define LPFC_NVME_CTX_RLS 0x8 /* ctx free requested */
+#define LPFC_NVME_ABTS_RCV 0x10 /* ABTS received on exchange */
+#define LPFC_NVME_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */
+#define LPFC_NVME_DEFER_WQFULL 0x40 /* Waiting on a free WQE */
+#define LPFC_NVME_TNOTIFY 0x80 /* notify transport of abts */
+
+struct lpfc_async_xchg_ctx {
+ union {
+ struct nvmefc_tgt_fcp_req fcp_req;
+ } hdlrctx;
+ struct list_head list;
+ struct lpfc_hba *phba;
+ struct lpfc_nodelist *ndlp;
+ struct nvmefc_ls_req *ls_req;
+ struct nvmefc_ls_rsp ls_rsp;
+ struct lpfc_iocbq *wqeq;
+ struct lpfc_iocbq *abort_wqeq;
+ spinlock_t ctxlock; /* protect flag access */
+ uint32_t sid;
+ uint32_t offset;
+ uint16_t oxid;
+ uint16_t size;
+ uint16_t entry_cnt;
+ uint16_t cpu;
+ uint16_t idx;
+ uint16_t state;
+ uint16_t flag;
+ void *payload;
+ struct rqb_dmabuf *rqb_buffer;
+ struct lpfc_nvmet_ctxbuf *ctxbuf;
+ struct lpfc_sli4_hdw_queue *hdwq;
+
+#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+ uint64_t ts_isr_cmd;
+ uint64_t ts_cmd_nvme;
+ uint64_t ts_nvme_data;
+ uint64_t ts_data_wqput;
+ uint64_t ts_isr_data;
+ uint64_t ts_data_nvme;
+ uint64_t ts_nvme_status;
+ uint64_t ts_status_wqput;
+ uint64_t ts_isr_status;
+ uint64_t ts_status_nvme;
+#endif
+};
+
+
+/* routines found in lpfc_nvme.c */
+int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp,
+ struct nvmefc_ls_req *pnvme_lsreq,
+ void (*gen_req_cmp)(struct lpfc_hba *phba,
+ struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe));
+void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport,
+ struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe);
+int __lpfc_nvme_ls_abort(struct lpfc_vport *vport,
+ struct lpfc_nodelist *ndlp, struct nvmefc_ls_req *pnvme_lsreq);
+
+/* routines found in lpfc_nvmet.c */
+int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
+ struct lpfc_async_xchg_ctx *ctxp, uint32_t sid,
+ uint16_t xri);
+int __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg,
+ struct nvmefc_ls_rsp *ls_rsp,
+ void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba,
+ struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe));
+void __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba,
+ struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe);
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c
index 565419bf8d74..32eb5e873e9b 100644
--- a/drivers/scsi/lpfc/lpfc_nvmet.c
+++ b/drivers/scsi/lpfc/lpfc_nvmet.c
@@ -36,10 +36,6 @@
#include <scsi/scsi_transport_fc.h>
#include <scsi/fc/fc_fs.h>
-#include <linux/nvme.h>
-#include <linux/nvme-fc-driver.h>
-#include <linux/nvme-fc.h>
-
#include "lpfc_version.h"
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
@@ -50,29 +46,25 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
#include "lpfc_logmsg.h"
#include "lpfc_crtn.h"
#include "lpfc_vport.h"
#include "lpfc_debugfs.h"
static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *,
- struct lpfc_nvmet_rcv_ctx *,
+ struct lpfc_async_xchg_ctx *,
dma_addr_t rspbuf,
uint16_t rspsize);
static struct lpfc_iocbq *lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *,
- struct lpfc_nvmet_rcv_ctx *);
+ struct lpfc_async_xchg_ctx *);
static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *,
- struct lpfc_nvmet_rcv_ctx *,
+ struct lpfc_async_xchg_ctx *,
uint32_t, uint16_t);
static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *,
- struct lpfc_nvmet_rcv_ctx *,
+ struct lpfc_async_xchg_ctx *,
uint32_t, uint16_t);
-static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *,
- struct lpfc_nvmet_rcv_ctx *,
- uint32_t, uint16_t);
static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *,
- struct lpfc_nvmet_rcv_ctx *);
+ struct lpfc_async_xchg_ctx *);
static void lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *);
static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf);
@@ -221,10 +213,10 @@ lpfc_nvmet_cmd_template(void)
}
#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
-static struct lpfc_nvmet_rcv_ctx *
+static struct lpfc_async_xchg_ctx *
lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri)
{
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
unsigned long iflag;
bool found = false;
@@ -243,10 +235,10 @@ lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri)
return NULL;
}
-static struct lpfc_nvmet_rcv_ctx *
+static struct lpfc_async_xchg_ctx *
lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid)
{
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
unsigned long iflag;
bool found = false;
@@ -267,7 +259,8 @@ lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid)
#endif
static void
-lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
+lpfc_nvmet_defer_release(struct lpfc_hba *phba,
+ struct lpfc_async_xchg_ctx *ctxp)
{
lockdep_assert_held(&ctxp->ctxlock);
@@ -275,10 +268,10 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
"6313 NVMET Defer ctx release oxid x%x flg x%x\n",
ctxp->oxid, ctxp->flag);
- if (ctxp->flag & LPFC_NVMET_CTX_RLS)
+ if (ctxp->flag & LPFC_NVME_CTX_RLS)
return;
- ctxp->flag |= LPFC_NVMET_CTX_RLS;
+ ctxp->flag |= LPFC_NVME_CTX_RLS;
spin_lock(&phba->sli4_hba.t_active_list_lock);
list_del(&ctxp->list);
spin_unlock(&phba->sli4_hba.t_active_list_lock);
@@ -288,6 +281,53 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
}
/**
+ * __lpfc_nvme_xmt_ls_rsp_cmp - Generic completion handler for the
+ * transmission of an NVME LS response.
+ * @phba: Pointer to HBA context object.
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * The function is called from SLI ring event handler with no
+ * lock held. The function frees memory resources used for the command
+ * used to send the NVME LS RSP.
+ **/
+void
+__lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe)
+{
+ struct lpfc_async_xchg_ctx *axchg = cmdwqe->context2;
+ struct nvmefc_ls_rsp *ls_rsp = &axchg->ls_rsp;
+ uint32_t status, result;
+
+ status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+ result = wcqe->parameter;
+
+ if (axchg->state != LPFC_NVME_STE_LS_RSP || axchg->entry_cnt != 2) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+ "6410 NVMEx LS cmpl state mismatch IO x%x: "
+ "%d %d\n",
+ axchg->oxid, axchg->state, axchg->entry_cnt);
+ }
+
+ lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x result x%x\n",
+ axchg->oxid, status, result);
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+ "6038 NVMEx LS rsp cmpl: %d %d oxid x%x\n",
+ status, result, axchg->oxid);
+
+ lpfc_nlp_put(cmdwqe->context1);
+ cmdwqe->context2 = NULL;
+ cmdwqe->context3 = NULL;
+ lpfc_sli_release_iocbq(phba, cmdwqe);
+ ls_rsp->done(ls_rsp);
+ lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
+ "6200 NVMEx LS rsp cmpl done status %d oxid x%x\n",
+ status, axchg->oxid);
+ kfree(axchg);
+}
+
+/**
* lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response
* @phba: Pointer to HBA context object.
* @cmdwqe: Pointer to driver command WQE object.
@@ -295,33 +335,23 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp)
*
* The function is called from SLI ring event handler with no
* lock held. This function is the completion handler for NVME LS commands
- * The function frees memory resources used for the NVME commands.
+ * The function updates any states and statistics, then calls the
+ * generic completion handler to free resources.
**/
static void
lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe)
{
struct lpfc_nvmet_tgtport *tgtp;
- struct nvmefc_tgt_ls_req *rsp;
- struct lpfc_nvmet_rcv_ctx *ctxp;
uint32_t status, result;
- status = bf_get(lpfc_wcqe_c_status, wcqe);
- result = wcqe->parameter;
- ctxp = cmdwqe->context2;
-
- if (ctxp->state != LPFC_NVMET_STE_LS_RSP || ctxp->entry_cnt != 2) {
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6410 NVMET LS cmpl state mismatch IO x%x: "
- "%d %d\n",
- ctxp->oxid, ctxp->state, ctxp->entry_cnt);
- }
-
if (!phba->targetport)
- goto out;
+ goto finish;
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK;
+ result = wcqe->parameter;
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
if (tgtp) {
if (status) {
atomic_inc(&tgtp->xmt_ls_rsp_error);
@@ -334,22 +364,8 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
}
}
-out:
- rsp = &ctxp->ctx.ls_req;
-
- lpfc_nvmeio_data(phba, "NVMET LS CMPL: xri x%x stat x%x result x%x\n",
- ctxp->oxid, status, result);
-
- lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
- "6038 NVMET LS rsp cmpl: %d %d oxid x%x\n",
- status, result, ctxp->oxid);
-
- lpfc_nlp_put(cmdwqe->context1);
- cmdwqe->context2 = NULL;
- cmdwqe->context3 = NULL;
- lpfc_sli_release_iocbq(phba, cmdwqe);
- rsp->done(rsp);
- kfree(ctxp);
+finish:
+ __lpfc_nvme_xmt_ls_rsp_cmp(phba, cmdwqe, wcqe);
}
/**
@@ -369,7 +385,7 @@ void
lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
{
#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
- struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+ struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context;
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
struct rqb_dmabuf *nvmebuf;
@@ -378,7 +394,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
int cpu;
unsigned long iflag;
- if (ctxp->state == LPFC_NVMET_STE_FREE) {
+ if (ctxp->state == LPFC_NVME_STE_FREE) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6411 NVMET free, already free IO x%x: %d %d\n",
ctxp->oxid, ctxp->state, ctxp->entry_cnt);
@@ -390,8 +406,8 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
/* check if freed in another path whilst acquiring lock */
if (nvmebuf) {
ctxp->rqb_buffer = NULL;
- if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) {
- ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ;
+ if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) {
+ ctxp->flag &= ~LPFC_NVME_CTX_REUSE_WQ;
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
nvmebuf->hrq->rqbp->rqb_free_buffer(phba,
nvmebuf);
@@ -404,7 +420,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
}
}
- ctxp->state = LPFC_NVMET_STE_FREE;
+ ctxp->state = LPFC_NVME_STE_FREE;
spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag);
if (phba->sli4_hba.nvmet_io_wait_cnt) {
@@ -421,14 +437,14 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
size = nvmebuf->bytes_recv;
sid = sli4_sid_from_fc_hdr(fc_hdr);
- ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+ ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context;
ctxp->wqeq = NULL;
ctxp->offset = 0;
ctxp->phba = phba;
ctxp->size = size;
ctxp->oxid = oxid;
ctxp->sid = sid;
- ctxp->state = LPFC_NVMET_STE_RCV;
+ ctxp->state = LPFC_NVME_STE_RCV;
ctxp->entry_cnt = 1;
ctxp->flag = 0;
ctxp->ctxbuf = ctx_buf;
@@ -453,7 +469,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
/* Indicate that a replacement buffer has been posted */
spin_lock_irqsave(&ctxp->ctxlock, iflag);
- ctxp->flag |= LPFC_NVMET_CTX_REUSE_WQ;
+ ctxp->flag |= LPFC_NVME_CTX_REUSE_WQ;
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
if (!queue_work(phba->wq, &ctx_buf->defer_work)) {
@@ -495,7 +511,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf)
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
static void
lpfc_nvmet_ktime(struct lpfc_hba *phba,
- struct lpfc_nvmet_rcv_ctx *ctxp)
+ struct lpfc_async_xchg_ctx *ctxp)
{
uint64_t seg1, seg2, seg3, seg4, seg5;
uint64_t seg6, seg7, seg8, seg9, seg10;
@@ -704,16 +720,16 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
{
struct lpfc_nvmet_tgtport *tgtp;
struct nvmefc_tgt_fcp_req *rsp;
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
uint32_t status, result, op, start_clean, logerr;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
int id;
#endif
ctxp = cmdwqe->context2;
- ctxp->flag &= ~LPFC_NVMET_IO_INP;
+ ctxp->flag &= ~LPFC_NVME_IO_INP;
- rsp = &ctxp->ctx.fcp_req;
+ rsp = &ctxp->hdlrctx.fcp_req;
op = rsp->op;
status = bf_get(lpfc_wcqe_c_status, wcqe);
@@ -740,13 +756,13 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
/* pick up SLI4 exhange busy condition */
if (bf_get(lpfc_wcqe_c_xb, wcqe)) {
- ctxp->flag |= LPFC_NVMET_XBUSY;
+ ctxp->flag |= LPFC_NVME_XBUSY;
logerr |= LOG_NVME_ABTS;
if (tgtp)
atomic_inc(&tgtp->xmt_fcp_rsp_xb_set);
} else {
- ctxp->flag &= ~LPFC_NVMET_XBUSY;
+ ctxp->flag &= ~LPFC_NVME_XBUSY;
}
lpfc_printf_log(phba, KERN_INFO, logerr,
@@ -768,7 +784,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
if ((op == NVMET_FCOP_READDATA_RSP) ||
(op == NVMET_FCOP_RSP)) {
/* Sanity check */
- ctxp->state = LPFC_NVMET_STE_DONE;
+ ctxp->state = LPFC_NVME_STE_DONE;
ctxp->entry_cnt++;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -826,17 +842,32 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
#endif
}
-static int
-lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
- struct nvmefc_tgt_ls_req *rsp)
+/**
+ * __lpfc_nvme_xmt_ls_rsp - Generic service routine to issue transmit
+ * an NVME LS rsp for a prior NVME LS request that was received.
+ * @axchg: pointer to exchange context for the NVME LS request the response
+ * is for.
+ * @ls_rsp: pointer to the transport LS RSP that is to be sent
+ * @xmt_ls_rsp_cmp: completion routine to call upon RSP transmit done
+ *
+ * This routine is used to format and send a WQE to transmit a NVME LS
+ * Response. The response is for a prior NVME LS request that was
+ * received and posted to the transport.
+ *
+ * Returns:
+ * 0 : if response successfully transmit
+ * non-zero : if response failed to transmit, of the form -Exxx.
+ **/
+int
+__lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg,
+ struct nvmefc_ls_rsp *ls_rsp,
+ void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba,
+ struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe))
{
- struct lpfc_nvmet_rcv_ctx *ctxp =
- container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_req);
- struct lpfc_hba *phba = ctxp->phba;
- struct hbq_dmabuf *nvmebuf =
- (struct hbq_dmabuf *)ctxp->rqb_buffer;
+ struct lpfc_hba *phba = axchg->phba;
+ struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)axchg->rqb_buffer;
struct lpfc_iocbq *nvmewqeq;
- struct lpfc_nvmet_tgtport *nvmep = tgtport->private;
struct lpfc_dmabuf dmabuf;
struct ulp_bde64 bpl;
int rc;
@@ -844,34 +875,28 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
if (phba->pport->load_flag & FC_UNLOADING)
return -ENODEV;
- if (phba->pport->load_flag & FC_UNLOADING)
- return -ENODEV;
-
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
- "6023 NVMET LS rsp oxid x%x\n", ctxp->oxid);
+ "6023 NVMEx LS rsp oxid x%x\n", axchg->oxid);
- if ((ctxp->state != LPFC_NVMET_STE_LS_RCV) ||
- (ctxp->entry_cnt != 1)) {
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6412 NVMET LS rsp state mismatch "
+ if (axchg->state != LPFC_NVME_STE_LS_RCV || axchg->entry_cnt != 1) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+ "6412 NVMEx LS rsp state mismatch "
"oxid x%x: %d %d\n",
- ctxp->oxid, ctxp->state, ctxp->entry_cnt);
+ axchg->oxid, axchg->state, axchg->entry_cnt);
+ return -EALREADY;
}
- ctxp->state = LPFC_NVMET_STE_LS_RSP;
- ctxp->entry_cnt++;
+ axchg->state = LPFC_NVME_STE_LS_RSP;
+ axchg->entry_cnt++;
- nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma,
- rsp->rsplen);
+ nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, axchg, ls_rsp->rspdma,
+ ls_rsp->rsplen);
if (nvmewqeq == NULL) {
- atomic_inc(&nvmep->xmt_ls_drop);
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6150 LS Drop IO x%x: Prep\n",
- ctxp->oxid);
- lpfc_in_buf_free(phba, &nvmebuf->dbuf);
- atomic_inc(&nvmep->xmt_ls_abort);
- lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp,
- ctxp->sid, ctxp->oxid);
- return -ENOMEM;
+ lpfc_printf_log(phba, KERN_ERR,
+ LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS,
+ "6150 NVMEx LS Drop Rsp x%x: Prep\n",
+ axchg->oxid);
+ rc = -ENOMEM;
+ goto out_free_buf;
}
/* Save numBdes for bpl2sgl */
@@ -881,39 +906,106 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
dmabuf.virt = &bpl;
bpl.addrLow = nvmewqeq->wqe.xmit_sequence.bde.addrLow;
bpl.addrHigh = nvmewqeq->wqe.xmit_sequence.bde.addrHigh;
- bpl.tus.f.bdeSize = rsp->rsplen;
+ bpl.tus.f.bdeSize = ls_rsp->rsplen;
bpl.tus.f.bdeFlags = 0;
bpl.tus.w = le32_to_cpu(bpl.tus.w);
+ /*
+ * Note: although we're using stack space for the dmabuf, the
+ * call to lpfc_sli4_issue_wqe is synchronous, so it will not
+ * be referenced after it returns back to this routine.
+ */
- nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_rsp_cmp;
+ nvmewqeq->wqe_cmpl = xmt_ls_rsp_cmp;
nvmewqeq->iocb_cmpl = NULL;
- nvmewqeq->context2 = ctxp;
+ nvmewqeq->context2 = axchg;
- lpfc_nvmeio_data(phba, "NVMET LS RESP: xri x%x wqidx x%x len x%x\n",
- ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen);
+ lpfc_nvmeio_data(phba, "NVMEx LS RSP: xri x%x wqidx x%x len x%x\n",
+ axchg->oxid, nvmewqeq->hba_wqidx, ls_rsp->rsplen);
+
+ rc = lpfc_sli4_issue_wqe(phba, axchg->hdwq, nvmewqeq);
+
+ /* clear to be sure there's no reference */
+ nvmewqeq->context3 = NULL;
- rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
if (rc == WQE_SUCCESS) {
/*
* Okay to repost buffer here, but wait till cmpl
* before freeing ctxp and iocbq.
*/
lpfc_in_buf_free(phba, &nvmebuf->dbuf);
- atomic_inc(&nvmep->xmt_ls_rsp);
return 0;
}
- /* Give back resources */
- atomic_inc(&nvmep->xmt_ls_drop);
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6151 LS Drop IO x%x: Issue %d\n",
- ctxp->oxid, rc);
+
+ lpfc_printf_log(phba, KERN_ERR,
+ LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS,
+ "6151 NVMEx LS RSP x%x: failed to transmit %d\n",
+ axchg->oxid, rc);
+
+ rc = -ENXIO;
lpfc_nlp_put(nvmewqeq->context1);
+out_free_buf:
+ /* Give back resources */
lpfc_in_buf_free(phba, &nvmebuf->dbuf);
- atomic_inc(&nvmep->xmt_ls_abort);
- lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid);
- return -ENXIO;
+
+ /*
+ * As transport doesn't track completions of responses, if the rsp
+ * fails to send, the transport will effectively ignore the rsp
+ * and consider the LS done. However, the driver has an active
+ * exchange open for the LS - so be sure to abort the exchange
+ * if the response isn't sent.
+ */
+ lpfc_nvme_unsol_ls_issue_abort(phba, axchg, axchg->sid, axchg->oxid);
+ return rc;
+}
+
+/**
+ * lpfc_nvmet_xmt_ls_rsp - Transmit NVME LS response
+ * @tgtport: pointer to target port that NVME LS is to be transmit from.
+ * @ls_rsp: pointer to the transport LS RSP that is to be sent
+ *
+ * Driver registers this routine to transmit responses for received NVME
+ * LS requests.
+ *
+ * This routine is used to format and send a WQE to transmit a NVME LS
+ * Response. The ls_rsp is used to reverse-map the LS to the original
+ * NVME LS request sequence, which provides addressing information for
+ * the remote port the LS to be sent to, as well as the exchange id
+ * that is the LS is bound to.
+ *
+ * Returns:
+ * 0 : if response successfully transmit
+ * non-zero : if response failed to transmit, of the form -Exxx.
+ **/
+static int
+lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport,
+ struct nvmefc_ls_rsp *ls_rsp)
+{
+ struct lpfc_async_xchg_ctx *axchg =
+ container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp);
+ struct lpfc_nvmet_tgtport *nvmep = tgtport->private;
+ int rc;
+
+ if (axchg->phba->pport->load_flag & FC_UNLOADING)
+ return -ENODEV;
+
+ rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, lpfc_nvmet_xmt_ls_rsp_cmp);
+
+ if (rc) {
+ atomic_inc(&nvmep->xmt_ls_drop);
+ /*
+ * unless the failure is due to having already sent
+ * the response, an abort will be generated for the
+ * exchange if the rsp can't be sent.
+ */
+ if (rc != -EALREADY)
+ atomic_inc(&nvmep->xmt_ls_abort);
+ return rc;
+ }
+
+ atomic_inc(&nvmep->xmt_ls_rsp);
+ return 0;
}
static int
@@ -921,8 +1013,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *rsp)
{
struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
- struct lpfc_nvmet_rcv_ctx *ctxp =
- container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+ struct lpfc_async_xchg_ctx *ctxp =
+ container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
struct lpfc_hba *phba = ctxp->phba;
struct lpfc_queue *wq;
struct lpfc_iocbq *nvmewqeq;
@@ -968,8 +1060,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
#endif
/* Sanity check */
- if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) ||
- (ctxp->state == LPFC_NVMET_STE_ABORT)) {
+ if ((ctxp->flag & LPFC_NVME_ABTS_RCV) ||
+ (ctxp->state == LPFC_NVME_STE_ABORT)) {
atomic_inc(&lpfc_nvmep->xmt_fcp_drop);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6102 IO oxid x%x aborted\n",
@@ -997,7 +1089,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n",
ctxp->oxid, rsp->op, rsp->rsplen);
- ctxp->flag |= LPFC_NVMET_IO_INP;
+ ctxp->flag |= LPFC_NVME_IO_INP;
rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
if (rc == WQE_SUCCESS) {
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
@@ -1016,7 +1108,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport,
* WQ was full, so queue nvmewqeq to be sent after
* WQE release CQE
*/
- ctxp->flag |= LPFC_NVMET_DEFER_WQFULL;
+ ctxp->flag |= LPFC_NVME_DEFER_WQFULL;
wq = ctxp->hdwq->io_wq;
pring = wq->pring;
spin_lock_irqsave(&pring->ring_lock, iflags);
@@ -1056,8 +1148,8 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *req)
{
struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
- struct lpfc_nvmet_rcv_ctx *ctxp =
- container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+ struct lpfc_async_xchg_ctx *ctxp =
+ container_of(req, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
struct lpfc_hba *phba = ctxp->phba;
struct lpfc_queue *wq;
unsigned long flags;
@@ -1085,13 +1177,13 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
/* Since iaab/iaar are NOT set, we need to check
* if the firmware is in process of aborting IO
*/
- if (ctxp->flag & (LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP)) {
+ if (ctxp->flag & (LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP)) {
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
return;
}
- ctxp->flag |= LPFC_NVMET_ABORT_OP;
+ ctxp->flag |= LPFC_NVME_ABORT_OP;
- if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) {
+ if (ctxp->flag & LPFC_NVME_DEFER_WQFULL) {
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
ctxp->oxid);
@@ -1101,11 +1193,11 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport,
}
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
- /* An state of LPFC_NVMET_STE_RCV means we have just received
+ /* A state of LPFC_NVME_STE_RCV means we have just received
* the NVME command and have not started processing it.
* (by issuing any IO WQEs on this exchange yet)
*/
- if (ctxp->state == LPFC_NVMET_STE_RCV)
+ if (ctxp->state == LPFC_NVME_STE_RCV)
lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid,
ctxp->oxid);
else
@@ -1118,26 +1210,26 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *rsp)
{
struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private;
- struct lpfc_nvmet_rcv_ctx *ctxp =
- container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+ struct lpfc_async_xchg_ctx *ctxp =
+ container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
struct lpfc_hba *phba = ctxp->phba;
unsigned long flags;
bool aborting = false;
spin_lock_irqsave(&ctxp->ctxlock, flags);
- if (ctxp->flag & LPFC_NVMET_XBUSY)
+ if (ctxp->flag & LPFC_NVME_XBUSY)
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR,
"6027 NVMET release with XBUSY flag x%x"
" oxid x%x\n",
ctxp->flag, ctxp->oxid);
- else if (ctxp->state != LPFC_NVMET_STE_DONE &&
- ctxp->state != LPFC_NVMET_STE_ABORT)
+ else if (ctxp->state != LPFC_NVME_STE_DONE &&
+ ctxp->state != LPFC_NVME_STE_ABORT)
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6413 NVMET release bad state %d %d oxid x%x\n",
ctxp->state, ctxp->entry_cnt, ctxp->oxid);
- if ((ctxp->flag & LPFC_NVMET_ABORT_OP) ||
- (ctxp->flag & LPFC_NVMET_XBUSY)) {
+ if ((ctxp->flag & LPFC_NVME_ABORT_OP) ||
+ (ctxp->flag & LPFC_NVME_XBUSY)) {
aborting = true;
/* let the abort path do the real release */
lpfc_nvmet_defer_release(phba, ctxp);
@@ -1148,7 +1240,7 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport,
ctxp->state, aborting);
atomic_inc(&lpfc_nvmep->xmt_fcp_release);
- ctxp->flag &= ~LPFC_NVMET_TNOTIFY;
+ ctxp->flag &= ~LPFC_NVME_TNOTIFY;
if (aborting)
return;
@@ -1161,8 +1253,8 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
struct nvmefc_tgt_fcp_req *rsp)
{
struct lpfc_nvmet_tgtport *tgtp;
- struct lpfc_nvmet_rcv_ctx *ctxp =
- container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req);
+ struct lpfc_async_xchg_ctx *ctxp =
+ container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req);
struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
struct lpfc_hba *phba = ctxp->phba;
unsigned long iflag;
@@ -1190,6 +1282,116 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport,
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
}
+/**
+ * lpfc_nvmet_ls_req_cmp - completion handler for a nvme ls request
+ * @phba: Pointer to HBA context object
+ * @cmdwqe: Pointer to driver command WQE object.
+ * @wcqe: Pointer to driver response CQE object.
+ *
+ * This function is the completion handler for NVME LS requests.
+ * The function updates any states and statistics, then calls the
+ * generic completion handler to finish completion of the request.
+ **/
+static void
+lpfc_nvmet_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
+ struct lpfc_wcqe_complete *wcqe)
+{
+ __lpfc_nvme_ls_req_cmp(phba, cmdwqe->vport, cmdwqe, wcqe);
+}
+
+/**
+ * lpfc_nvmet_ls_req - Issue an Link Service request
+ * @targetport - pointer to target instance registered with nvmet transport.
+ * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv.
+ * Driver sets this value to the ndlp pointer.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for the LS
+ *
+ * Driver registers this routine to handle any link service request
+ * from the nvme_fc transport to a remote nvme-aware port.
+ *
+ * Return value :
+ * 0 - Success
+ * non-zero: various error codes, in form of -Exxx
+ **/
+static int
+lpfc_nvmet_ls_req(struct nvmet_fc_target_port *targetport,
+ void *hosthandle,
+ struct nvmefc_ls_req *pnvme_lsreq)
+{
+ struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private;
+ struct lpfc_hba *phba;
+ struct lpfc_nodelist *ndlp;
+ int ret;
+ u32 hstate;
+
+ if (!lpfc_nvmet)
+ return -EINVAL;
+
+ phba = lpfc_nvmet->phba;
+ if (phba->pport->load_flag & FC_UNLOADING)
+ return -EINVAL;
+
+ hstate = atomic_read(&lpfc_nvmet->state);
+ if (hstate == LPFC_NVMET_INV_HOST_ACTIVE)
+ return -EACCES;
+
+ ndlp = (struct lpfc_nodelist *)hosthandle;
+
+ ret = __lpfc_nvme_ls_req(phba->pport, ndlp, pnvme_lsreq,
+ lpfc_nvmet_ls_req_cmp);
+
+ return ret;
+}
+
+/**
+ * lpfc_nvmet_ls_abort - Abort a prior NVME LS request
+ * @targetport: Transport targetport, that LS was issued from.
+ * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv.
+ * Driver sets this value to the ndlp pointer.
+ * @pnvme_lsreq - the transport nvme_ls_req structure for LS to be aborted
+ *
+ * Driver registers this routine to abort an NVME LS request that is
+ * in progress (from the transports perspective).
+ **/
+static void
+lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport,
+ void *hosthandle,
+ struct nvmefc_ls_req *pnvme_lsreq)
+{
+ struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private;
+ struct lpfc_hba *phba;
+ struct lpfc_nodelist *ndlp;
+ int ret;
+
+ phba = lpfc_nvmet->phba;
+ if (phba->pport->load_flag & FC_UNLOADING)
+ return;
+
+ ndlp = (struct lpfc_nodelist *)hosthandle;
+
+ ret = __lpfc_nvme_ls_abort(phba->pport, ndlp, pnvme_lsreq);
+ if (!ret)
+ atomic_inc(&lpfc_nvmet->xmt_ls_abort);
+}
+
+static void
+lpfc_nvmet_host_release(void *hosthandle)
+{
+ struct lpfc_nodelist *ndlp = hosthandle;
+ struct lpfc_hba *phba = NULL;
+ struct lpfc_nvmet_tgtport *tgtp;
+
+ phba = ndlp->phba;
+ if (!phba->targetport || !phba->targetport->private)
+ return;
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_NVME,
+ "6202 NVMET XPT releasing hosthandle x%px\n",
+ hosthandle);
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ atomic_set(&tgtp->state, 0);
+}
+
static void
lpfc_nvmet_discovery_event(struct nvmet_fc_target_port *tgtport)
{
@@ -1214,6 +1416,9 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
.fcp_req_release = lpfc_nvmet_xmt_fcp_release,
.defer_rcv = lpfc_nvmet_defer_rcv,
.discovery_event = lpfc_nvmet_discovery_event,
+ .ls_req = lpfc_nvmet_ls_req,
+ .ls_abort = lpfc_nvmet_ls_abort,
+ .host_release = lpfc_nvmet_host_release,
.max_hw_queues = 1,
.max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS,
@@ -1224,6 +1429,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = {
.target_features = 0,
/* sizes of additional private data for data structures */
.target_priv_sz = sizeof(struct lpfc_nvmet_tgtport),
+ .lsrqst_priv_sz = 0,
};
static void
@@ -1368,7 +1574,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba)
return -ENOMEM;
}
ctx_buf->context->ctxbuf = ctx_buf;
- ctx_buf->context->state = LPFC_NVMET_STE_FREE;
+ ctx_buf->context->state = LPFC_NVME_STE_FREE;
ctx_buf->iocbq = lpfc_sli_get_iocbq(phba);
if (!ctx_buf->iocbq) {
@@ -1568,7 +1774,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri);
uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri);
- struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+ struct lpfc_async_xchg_ctx *ctxp, *next_ctxp;
struct lpfc_nvmet_tgtport *tgtp;
struct nvmefc_tgt_fcp_req *req = NULL;
struct lpfc_nodelist *ndlp;
@@ -1599,12 +1805,12 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
/* Check if we already received a free context call
* and we have completed processing an abort situation.
*/
- if (ctxp->flag & LPFC_NVMET_CTX_RLS &&
- !(ctxp->flag & LPFC_NVMET_ABORT_OP)) {
+ if (ctxp->flag & LPFC_NVME_CTX_RLS &&
+ !(ctxp->flag & LPFC_NVME_ABORT_OP)) {
list_del_init(&ctxp->list);
released = true;
}
- ctxp->flag &= ~LPFC_NVMET_XBUSY;
+ ctxp->flag &= ~LPFC_NVME_XBUSY;
spin_unlock(&ctxp->ctxlock);
spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
@@ -1646,15 +1852,15 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba,
rxid);
spin_lock_irqsave(&ctxp->ctxlock, iflag);
- ctxp->flag |= LPFC_NVMET_ABTS_RCV;
- ctxp->state = LPFC_NVMET_STE_ABORT;
+ ctxp->flag |= LPFC_NVME_ABTS_RCV;
+ ctxp->state = LPFC_NVME_STE_ABORT;
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
lpfc_nvmeio_data(phba,
"NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n",
xri, raw_smp_processor_id(), 0);
- req = &ctxp->ctx.fcp_req;
+ req = &ctxp->hdlrctx.fcp_req;
if (req)
nvmet_fc_rcv_fcp_abort(phba->targetport, req);
}
@@ -1667,7 +1873,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
{
#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
struct lpfc_hba *phba = vport->phba;
- struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp;
+ struct lpfc_async_xchg_ctx *ctxp, *next_ctxp;
struct nvmefc_tgt_fcp_req *rsp;
uint32_t sid;
uint16_t oxid, xri;
@@ -1690,7 +1896,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
spin_unlock_irqrestore(&phba->hbalock, iflag);
spin_lock_irqsave(&ctxp->ctxlock, iflag);
- ctxp->flag |= LPFC_NVMET_ABTS_RCV;
+ ctxp->flag |= LPFC_NVME_ABTS_RCV;
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
lpfc_nvmeio_data(phba,
@@ -1700,7 +1906,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6319 NVMET Rcv ABTS:acc xri x%x\n", xri);
- rsp = &ctxp->ctx.fcp_req;
+ rsp = &ctxp->hdlrctx.fcp_req;
nvmet_fc_rcv_fcp_abort(phba->targetport, rsp);
/* Respond with BA_ACC accordingly */
@@ -1759,7 +1965,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
xri = ctxp->ctxbuf->sglq->sli4_xritag;
spin_lock_irqsave(&ctxp->ctxlock, iflag);
- ctxp->flag |= (LPFC_NVMET_ABTS_RCV | LPFC_NVMET_ABORT_OP);
+ ctxp->flag |= (LPFC_NVME_ABTS_RCV | LPFC_NVME_ABORT_OP);
spin_unlock_irqrestore(&ctxp->ctxlock, iflag);
lpfc_nvmeio_data(phba,
@@ -1771,10 +1977,10 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
"flag x%x state x%x\n",
ctxp->oxid, xri, ctxp->flag, ctxp->state);
- if (ctxp->flag & LPFC_NVMET_TNOTIFY) {
+ if (ctxp->flag & LPFC_NVME_TNOTIFY) {
/* Notify the transport */
nvmet_fc_rcv_fcp_abort(phba->targetport,
- &ctxp->ctx.fcp_req);
+ &ctxp->hdlrctx.fcp_req);
} else {
cancel_work_sync(&ctxp->ctxbuf->defer_work);
spin_lock_irqsave(&ctxp->ctxlock, iflag);
@@ -1802,7 +2008,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport,
static void
lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq,
- struct lpfc_nvmet_rcv_ctx *ctxp)
+ struct lpfc_async_xchg_ctx *ctxp)
{
struct lpfc_sli_ring *pring;
struct lpfc_iocbq *nvmewqeq;
@@ -1853,7 +2059,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
struct lpfc_sli_ring *pring;
struct lpfc_iocbq *nvmewqeq;
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
unsigned long iflags;
int rc;
@@ -1867,7 +2073,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq,
list);
spin_unlock_irqrestore(&pring->ring_lock, iflags);
- ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmewqeq->context2;
+ ctxp = (struct lpfc_async_xchg_ctx *)nvmewqeq->context2;
rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq);
spin_lock_irqsave(&pring->ring_lock, iflags);
if (rc == -EBUSY) {
@@ -1879,7 +2085,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba,
if (rc == WQE_SUCCESS) {
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (ctxp->ts_cmd_nvme) {
- if (ctxp->ctx.fcp_req.op == NVMET_FCOP_RSP)
+ if (ctxp->hdlrctx.fcp_req.op == NVMET_FCOP_RSP)
ctxp->ts_status_wqput = ktime_get_ns();
else
ctxp->ts_data_wqput = ktime_get_ns();
@@ -1926,114 +2132,61 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba)
}
/**
- * lpfc_nvmet_unsol_ls_buffer - Process an unsolicited event data buffer
+ * lpfc_nvmet_handle_lsreq - Process an NVME LS request
* @phba: pointer to lpfc hba data structure.
- * @pring: pointer to a SLI ring.
- * @nvmebuf: pointer to lpfc nvme command HBQ data structure.
+ * @axchg: pointer to exchange context for the NVME LS request
*
- * This routine is used for processing the WQE associated with a unsolicited
- * event. It first determines whether there is an existing ndlp that matches
- * the DID from the unsolicited WQE. If not, it will create a new one with
- * the DID from the unsolicited WQE. The ELS command from the unsolicited
- * WQE is then used to invoke the proper routine and to set up proper state
- * of the discovery state machine.
- **/
-static void
-lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
- struct hbq_dmabuf *nvmebuf)
+ * This routine is used for processing an asychronously received NVME LS
+ * request. Any remaining validation is done and the LS is then forwarded
+ * to the nvmet-fc transport via nvmet_fc_rcv_ls_req().
+ *
+ * The calling sequence should be: nvmet_fc_rcv_ls_req() -> (processing)
+ * -> lpfc_nvmet_xmt_ls_rsp/cmp -> req->done.
+ * lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg.
+ *
+ * Returns 0 if LS was handled and delivered to the transport
+ * Returns 1 if LS failed to be handled and should be dropped
+ */
+int
+lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba,
+ struct lpfc_async_xchg_ctx *axchg)
{
#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
- struct lpfc_nvmet_tgtport *tgtp;
- struct fc_frame_header *fc_hdr;
- struct lpfc_nvmet_rcv_ctx *ctxp;
- uint32_t *payload;
- uint32_t size, oxid, sid, rc;
-
-
- if (!nvmebuf || !phba->targetport) {
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6154 LS Drop IO\n");
- oxid = 0;
- size = 0;
- sid = 0;
- ctxp = NULL;
- goto dropit;
- }
-
- fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt);
- oxid = be16_to_cpu(fc_hdr->fh_ox_id);
-
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
- payload = (uint32_t *)(nvmebuf->dbuf.virt);
- size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl);
- sid = sli4_sid_from_fc_hdr(fc_hdr);
+ struct lpfc_nvmet_tgtport *tgtp = phba->targetport->private;
+ uint32_t *payload = axchg->payload;
+ int rc;
- ctxp = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_ATOMIC);
- if (ctxp == NULL) {
- atomic_inc(&tgtp->rcv_ls_req_drop);
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6155 LS Drop IO x%x: Alloc\n",
- oxid);
-dropit:
- lpfc_nvmeio_data(phba, "NVMET LS DROP: "
- "xri x%x sz %d from %06x\n",
- oxid, size, sid);
- lpfc_in_buf_free(phba, &nvmebuf->dbuf);
- return;
- }
- ctxp->phba = phba;
- ctxp->size = size;
- ctxp->oxid = oxid;
- ctxp->sid = sid;
- ctxp->wqeq = NULL;
- ctxp->state = LPFC_NVMET_STE_LS_RCV;
- ctxp->entry_cnt = 1;
- ctxp->rqb_buffer = (void *)nvmebuf;
- ctxp->hdwq = &phba->sli4_hba.hdwq[0];
+ atomic_inc(&tgtp->rcv_ls_req_in);
- lpfc_nvmeio_data(phba, "NVMET LS RCV: xri x%x sz %d from %06x\n",
- oxid, size, sid);
/*
- * The calling sequence should be:
- * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done
- * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp.
+ * Driver passes the ndlp as the hosthandle argument allowing
+ * the transport to generate LS requests for any associateions
+ * that are created.
*/
- atomic_inc(&tgtp->rcv_ls_req_in);
- rc = nvmet_fc_rcv_ls_req(phba->targetport, &ctxp->ctx.ls_req,
- payload, size);
+ rc = nvmet_fc_rcv_ls_req(phba->targetport, axchg->ndlp, &axchg->ls_rsp,
+ axchg->payload, axchg->size);
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC,
"6037 NVMET Unsol rcv: sz %d rc %d: %08x %08x %08x "
- "%08x %08x %08x\n", size, rc,
+ "%08x %08x %08x\n", axchg->size, rc,
*payload, *(payload+1), *(payload+2),
*(payload+3), *(payload+4), *(payload+5));
- if (rc == 0) {
+ if (!rc) {
atomic_inc(&tgtp->rcv_ls_req_out);
- return;
+ return 0;
}
- lpfc_nvmeio_data(phba, "NVMET LS DROP: xri x%x sz %d from %06x\n",
- oxid, size, sid);
-
atomic_inc(&tgtp->rcv_ls_req_drop);
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n",
- ctxp->oxid, rc);
-
- /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */
- lpfc_in_buf_free(phba, &nvmebuf->dbuf);
-
- atomic_inc(&tgtp->xmt_ls_abort);
- lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid);
#endif
+ return 1;
}
static void
lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
{
#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
- struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context;
+ struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context;
struct lpfc_hba *phba = ctxp->phba;
struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer;
struct lpfc_nvmet_tgtport *tgtp;
@@ -2054,7 +2207,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
return;
}
- if (ctxp->flag & LPFC_NVMET_ABTS_RCV) {
+ if (ctxp->flag & LPFC_NVME_ABTS_RCV) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6324 IO oxid x%x aborted\n",
ctxp->oxid);
@@ -2063,7 +2216,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
payload = (uint32_t *)(nvmebuf->dbuf.virt);
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
- ctxp->flag |= LPFC_NVMET_TNOTIFY;
+ ctxp->flag |= LPFC_NVME_TNOTIFY;
#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
if (ctxp->ts_isr_cmd)
ctxp->ts_cmd_nvme = ktime_get_ns();
@@ -2077,13 +2230,13 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
* A buffer has already been reposted for this IO, so just free
* the nvmebuf.
*/
- rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req,
+ rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->hdlrctx.fcp_req,
payload, ctxp->size);
/* Process FCP command */
if (rc == 0) {
atomic_inc(&tgtp->rcv_fcp_cmd_out);
spin_lock_irqsave(&ctxp->ctxlock, iflags);
- if ((ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) ||
+ if ((ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) ||
(nvmebuf != ctxp->rqb_buffer)) {
spin_unlock_irqrestore(&ctxp->ctxlock, iflags);
return;
@@ -2102,7 +2255,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
atomic_inc(&tgtp->rcv_fcp_cmd_out);
atomic_inc(&tgtp->defer_fod);
spin_lock_irqsave(&ctxp->ctxlock, iflags);
- if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) {
+ if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) {
spin_unlock_irqrestore(&ctxp->ctxlock, iflags);
return;
}
@@ -2117,7 +2270,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf)
phba->sli4_hba.nvmet_mrq_data[qno], 1, qno);
return;
}
- ctxp->flag &= ~LPFC_NVMET_TNOTIFY;
+ ctxp->flag &= ~LPFC_NVME_TNOTIFY;
atomic_inc(&tgtp->rcv_fcp_cmd_drop);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n",
@@ -2224,7 +2377,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
uint64_t isr_timestamp,
uint8_t cqflag)
{
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
struct lpfc_nvmet_tgtport *tgtp;
struct fc_frame_header *fc_hdr;
struct lpfc_nvmet_ctxbuf *ctx_buf;
@@ -2306,11 +2459,11 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
sid = sli4_sid_from_fc_hdr(fc_hdr);
- ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context;
+ ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context;
spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag);
list_add_tail(&ctxp->list, &phba->sli4_hba.t_active_ctx_list);
spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag);
- if (ctxp->state != LPFC_NVMET_STE_FREE) {
+ if (ctxp->state != LPFC_NVME_STE_FREE) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6414 NVMET Context corrupt %d %d oxid x%x\n",
ctxp->state, ctxp->entry_cnt, ctxp->oxid);
@@ -2322,7 +2475,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
ctxp->oxid = oxid;
ctxp->sid = sid;
ctxp->idx = idx;
- ctxp->state = LPFC_NVMET_STE_RCV;
+ ctxp->state = LPFC_NVME_STE_RCV;
ctxp->entry_cnt = 1;
ctxp->flag = 0;
ctxp->ctxbuf = ctx_buf;
@@ -2369,40 +2522,6 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba,
}
/**
- * lpfc_nvmet_unsol_ls_event - Process an unsolicited event from an nvme nport
- * @phba: pointer to lpfc hba data structure.
- * @pring: pointer to a SLI ring.
- * @nvmebuf: pointer to received nvme data structure.
- *
- * This routine is used to process an unsolicited event received from a SLI
- * (Service Level Interface) ring. The actual processing of the data buffer
- * associated with the unsolicited event is done by invoking the routine
- * lpfc_nvmet_unsol_ls_buffer() after properly set up the buffer from the
- * SLI RQ on which the unsolicited event was received.
- **/
-void
-lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
- struct lpfc_iocbq *piocb)
-{
- struct lpfc_dmabuf *d_buf;
- struct hbq_dmabuf *nvmebuf;
-
- d_buf = piocb->context2;
- nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
-
- if (!nvmebuf) {
- lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
- "3015 LS Drop IO\n");
- return;
- }
- if (phba->nvmet_support == 0) {
- lpfc_in_buf_free(phba, &nvmebuf->dbuf);
- return;
- }
- lpfc_nvmet_unsol_ls_buffer(phba, pring, nvmebuf);
-}
-
-/**
* lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport
* @phba: pointer to lpfc hba data structure.
* @idx: relative index of MRQ vector
@@ -2462,7 +2581,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba,
**/
static struct lpfc_iocbq *
lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba,
- struct lpfc_nvmet_rcv_ctx *ctxp,
+ struct lpfc_async_xchg_ctx *ctxp,
dma_addr_t rspbuf, uint16_t rspsize)
{
struct lpfc_nodelist *ndlp;
@@ -2584,9 +2703,9 @@ nvme_wqe_free_wqeq_exit:
static struct lpfc_iocbq *
lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
- struct lpfc_nvmet_rcv_ctx *ctxp)
+ struct lpfc_async_xchg_ctx *ctxp)
{
- struct nvmefc_tgt_fcp_req *rsp = &ctxp->ctx.fcp_req;
+ struct nvmefc_tgt_fcp_req *rsp = &ctxp->hdlrctx.fcp_req;
struct lpfc_nvmet_tgtport *tgtp;
struct sli4_sge *sgl;
struct lpfc_nodelist *ndlp;
@@ -2647,9 +2766,9 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
}
/* Sanity check */
- if (((ctxp->state == LPFC_NVMET_STE_RCV) &&
+ if (((ctxp->state == LPFC_NVME_STE_RCV) &&
(ctxp->entry_cnt == 1)) ||
- (ctxp->state == LPFC_NVMET_STE_DATA)) {
+ (ctxp->state == LPFC_NVME_STE_DATA)) {
wqe = &nvmewqe->wqe;
} else {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
@@ -2912,7 +3031,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba,
sgl++;
ctxp->offset += cnt;
}
- ctxp->state = LPFC_NVMET_STE_DATA;
+ ctxp->state = LPFC_NVME_STE_DATA;
ctxp->entry_cnt++;
return nvmewqe;
}
@@ -2931,7 +3050,7 @@ static void
lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe)
{
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
struct lpfc_nvmet_tgtport *tgtp;
uint32_t result;
unsigned long flags;
@@ -2941,23 +3060,23 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
result = wcqe->parameter;
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
- if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+ if (ctxp->flag & LPFC_NVME_ABORT_OP)
atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
spin_lock_irqsave(&ctxp->ctxlock, flags);
- ctxp->state = LPFC_NVMET_STE_DONE;
+ ctxp->state = LPFC_NVME_STE_DONE;
/* Check if we already received a free context call
* and we have completed processing an abort situation.
*/
- if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
- !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+ if ((ctxp->flag & LPFC_NVME_CTX_RLS) &&
+ !(ctxp->flag & LPFC_NVME_XBUSY)) {
spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
list_del_init(&ctxp->list);
spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
released = true;
}
- ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+ ctxp->flag &= ~LPFC_NVME_ABORT_OP;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
atomic_inc(&tgtp->xmt_abort_rsp);
@@ -2981,7 +3100,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
lpfc_sli_release_iocbq(phba, cmdwqe);
/* Since iaab/iaar are NOT set, there is no work left.
- * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+ * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted
* should have been called already.
*/
}
@@ -3000,7 +3119,7 @@ static void
lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe)
{
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
struct lpfc_nvmet_tgtport *tgtp;
unsigned long flags;
uint32_t result;
@@ -3020,11 +3139,11 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
spin_lock_irqsave(&ctxp->ctxlock, flags);
- if (ctxp->flag & LPFC_NVMET_ABORT_OP)
+ if (ctxp->flag & LPFC_NVME_ABORT_OP)
atomic_inc(&tgtp->xmt_fcp_abort_cmpl);
/* Sanity check */
- if (ctxp->state != LPFC_NVMET_STE_ABORT) {
+ if (ctxp->state != LPFC_NVME_STE_ABORT) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
"6112 ABTS Wrong state:%d oxid x%x\n",
ctxp->state, ctxp->oxid);
@@ -3033,15 +3152,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
/* Check if we already received a free context call
* and we have completed processing an abort situation.
*/
- ctxp->state = LPFC_NVMET_STE_DONE;
- if ((ctxp->flag & LPFC_NVMET_CTX_RLS) &&
- !(ctxp->flag & LPFC_NVMET_XBUSY)) {
+ ctxp->state = LPFC_NVME_STE_DONE;
+ if ((ctxp->flag & LPFC_NVME_CTX_RLS) &&
+ !(ctxp->flag & LPFC_NVME_XBUSY)) {
spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
list_del_init(&ctxp->list);
spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
released = true;
}
- ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+ ctxp->flag &= ~LPFC_NVME_ABORT_OP;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
atomic_inc(&tgtp->xmt_abort_rsp);
@@ -3062,7 +3181,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf);
/* Since iaab/iaar are NOT set, there is no work left.
- * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted
+ * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted
* should have been called already.
*/
}
@@ -3081,15 +3200,17 @@ static void
lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
struct lpfc_wcqe_complete *wcqe)
{
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
struct lpfc_nvmet_tgtport *tgtp;
uint32_t result;
ctxp = cmdwqe->context2;
result = wcqe->parameter;
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
- atomic_inc(&tgtp->xmt_ls_abort_cmpl);
+ if (phba->nvmet_support) {
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ atomic_inc(&tgtp->xmt_ls_abort_cmpl);
+ }
lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS,
"6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n",
@@ -3107,7 +3228,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
return;
}
- if (ctxp->state != LPFC_NVMET_STE_LS_ABORT) {
+ if (ctxp->state != LPFC_NVME_STE_LS_ABORT) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6416 NVMET LS abort cmpl state mismatch: "
"oxid x%x: %d %d\n",
@@ -3122,10 +3243,10 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe,
static int
lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
- struct lpfc_nvmet_rcv_ctx *ctxp,
+ struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
- struct lpfc_nvmet_tgtport *tgtp;
+ struct lpfc_nvmet_tgtport *tgtp = NULL;
struct lpfc_iocbq *abts_wqeq;
union lpfc_wqe128 *wqe_abts;
struct lpfc_nodelist *ndlp;
@@ -3134,13 +3255,15 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
"6067 ABTS: sid %x xri x%x/x%x\n",
sid, xri, ctxp->wqeq->sli4_xritag);
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ if (phba->nvmet_support && phba->targetport)
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
ndlp = lpfc_findnode_did(phba->pport, sid);
if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
(ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
- atomic_inc(&tgtp->xmt_abort_rsp_error);
+ if (tgtp)
+ atomic_inc(&tgtp->xmt_abort_rsp_error);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
"6134 Drop ABTS - wrong NDLP state x%x.\n",
(ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE);
@@ -3217,7 +3340,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba,
static int
lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
- struct lpfc_nvmet_rcv_ctx *ctxp,
+ struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
struct lpfc_nvmet_tgtport *tgtp;
@@ -3244,7 +3367,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
/* No failure to an ABTS request. */
spin_lock_irqsave(&ctxp->ctxlock, flags);
- ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+ ctxp->flag &= ~LPFC_NVME_ABORT_OP;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
return 0;
}
@@ -3258,13 +3381,13 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
"6161 ABORT failed: No wqeqs: "
"xri: x%x\n", ctxp->oxid);
/* No failure to an ABTS request. */
- ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+ ctxp->flag &= ~LPFC_NVME_ABORT_OP;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
return 0;
}
abts_wqeq = ctxp->abort_wqeq;
- ctxp->state = LPFC_NVMET_STE_ABORT;
- opt = (ctxp->flag & LPFC_NVMET_ABTS_RCV) ? INHIBIT_ABORT : 0;
+ ctxp->state = LPFC_NVME_STE_ABORT;
+ opt = (ctxp->flag & LPFC_NVME_ABTS_RCV) ? INHIBIT_ABORT : 0;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
/* Announce entry to new IO submit field. */
@@ -3287,7 +3410,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
phba->hba_flag, ctxp->oxid);
lpfc_sli_release_iocbq(phba, abts_wqeq);
spin_lock_irqsave(&ctxp->ctxlock, flags);
- ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+ ctxp->flag &= ~LPFC_NVME_ABORT_OP;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
return 0;
}
@@ -3302,7 +3425,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
ctxp->oxid);
lpfc_sli_release_iocbq(phba, abts_wqeq);
spin_lock_irqsave(&ctxp->ctxlock, flags);
- ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+ ctxp->flag &= ~LPFC_NVME_ABORT_OP;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
return 0;
}
@@ -3331,7 +3454,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
atomic_inc(&tgtp->xmt_abort_rsp_error);
spin_lock_irqsave(&ctxp->ctxlock, flags);
- ctxp->flag &= ~LPFC_NVMET_ABORT_OP;
+ ctxp->flag &= ~LPFC_NVME_ABORT_OP;
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
lpfc_sli_release_iocbq(phba, abts_wqeq);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
@@ -3343,7 +3466,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba,
static int
lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
- struct lpfc_nvmet_rcv_ctx *ctxp,
+ struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
struct lpfc_nvmet_tgtport *tgtp;
@@ -3358,14 +3481,14 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
ctxp->wqeq->hba_wqidx = 0;
}
- if (ctxp->state == LPFC_NVMET_STE_FREE) {
+ if (ctxp->state == LPFC_NVME_STE_FREE) {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6417 NVMET ABORT ctx freed %d %d oxid x%x\n",
ctxp->state, ctxp->entry_cnt, ctxp->oxid);
rc = WQE_BUSY;
goto aerr;
}
- ctxp->state = LPFC_NVMET_STE_ABORT;
+ ctxp->state = LPFC_NVME_STE_ABORT;
ctxp->entry_cnt++;
rc = lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri);
if (rc == 0)
@@ -3387,13 +3510,13 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba,
aerr:
spin_lock_irqsave(&ctxp->ctxlock, flags);
- if (ctxp->flag & LPFC_NVMET_CTX_RLS) {
+ if (ctxp->flag & LPFC_NVME_CTX_RLS) {
spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
list_del_init(&ctxp->list);
spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock);
released = true;
}
- ctxp->flag &= ~(LPFC_NVMET_ABORT_OP | LPFC_NVMET_CTX_RLS);
+ ctxp->flag &= ~(LPFC_NVME_ABORT_OP | LPFC_NVME_CTX_RLS);
spin_unlock_irqrestore(&ctxp->ctxlock, flags);
atomic_inc(&tgtp->xmt_abort_rsp_error);
@@ -3406,29 +3529,39 @@ aerr:
return 1;
}
-static int
-lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
- struct lpfc_nvmet_rcv_ctx *ctxp,
+/**
+ * lpfc_nvme_unsol_ls_issue_abort - issue ABTS on an exchange received
+ * via async frame receive where the frame is not handled.
+ * @phba: pointer to adapter structure
+ * @ctxp: pointer to the asynchronously received received sequence
+ * @sid: address of the remote port to send the ABTS to
+ * @xri: oxid value to for the ABTS (other side's exchange id).
+ **/
+int
+lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba,
+ struct lpfc_async_xchg_ctx *ctxp,
uint32_t sid, uint16_t xri)
{
- struct lpfc_nvmet_tgtport *tgtp;
+ struct lpfc_nvmet_tgtport *tgtp = NULL;
struct lpfc_iocbq *abts_wqeq;
unsigned long flags;
int rc;
- if ((ctxp->state == LPFC_NVMET_STE_LS_RCV && ctxp->entry_cnt == 1) ||
- (ctxp->state == LPFC_NVMET_STE_LS_RSP && ctxp->entry_cnt == 2)) {
- ctxp->state = LPFC_NVMET_STE_LS_ABORT;
+ if ((ctxp->state == LPFC_NVME_STE_LS_RCV && ctxp->entry_cnt == 1) ||
+ (ctxp->state == LPFC_NVME_STE_LS_RSP && ctxp->entry_cnt == 2)) {
+ ctxp->state = LPFC_NVME_STE_LS_ABORT;
ctxp->entry_cnt++;
} else {
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR,
"6418 NVMET LS abort state mismatch "
"IO x%x: %d %d\n",
ctxp->oxid, ctxp->state, ctxp->entry_cnt);
- ctxp->state = LPFC_NVMET_STE_LS_ABORT;
+ ctxp->state = LPFC_NVME_STE_LS_ABORT;
}
- tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ if (phba->nvmet_support && phba->targetport)
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+
if (!ctxp->wqeq) {
/* Issue ABTS for this WQE based on iotag */
ctxp->wqeq = lpfc_sli_get_iocbq(phba);
@@ -3455,16 +3588,44 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba,
rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq);
spin_unlock_irqrestore(&phba->hbalock, flags);
if (rc == WQE_SUCCESS) {
- atomic_inc(&tgtp->xmt_abort_unsol);
+ if (tgtp)
+ atomic_inc(&tgtp->xmt_abort_unsol);
return 0;
}
out:
- atomic_inc(&tgtp->xmt_abort_rsp_error);
+ if (tgtp)
+ atomic_inc(&tgtp->xmt_abort_rsp_error);
abts_wqeq->context2 = NULL;
abts_wqeq->context3 = NULL;
lpfc_sli_release_iocbq(phba, abts_wqeq);
- kfree(ctxp);
lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS,
"6056 Failed to Issue ABTS. Status x%x\n", rc);
- return 0;
+ return 1;
+}
+
+/**
+ * lpfc_nvmet_invalidate_host
+ *
+ * @phba - pointer to the driver instance bound to an adapter port.
+ * @ndlp - pointer to an lpfc_nodelist type
+ *
+ * This routine upcalls the nvmet transport to invalidate an NVME
+ * host to which this target instance had active connections.
+ */
+void
+lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
+{
+ struct lpfc_nvmet_tgtport *tgtp;
+
+ lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_NVME_ABTS,
+ "6203 Invalidating hosthandle x%px\n",
+ ndlp);
+
+ tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private;
+ atomic_set(&tgtp->state, LPFC_NVMET_INV_HOST_ACTIVE);
+
+#if (IS_ENABLED(CONFIG_NVME_TARGET_FC))
+ /* Need to get the nvmet_fc_target_port pointer here.*/
+ nvmet_fc_invalidate_host(phba->targetport, ndlp);
+#endif
}
diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h
deleted file mode 100644
index b80b1639b9a7..000000000000
--- a/drivers/scsi/lpfc/lpfc_nvmet.h
+++ /dev/null
@@ -1,158 +0,0 @@
-/*******************************************************************
- * This file is part of the Emulex Linux Device Driver for *
- * Fibre Channel Host Bus Adapters. *
- * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term *
- * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. *
- * Copyright (C) 2004-2016 Emulex. All rights reserved. *
- * EMULEX and SLI are trademarks of Emulex. *
- * www.broadcom.com *
- * Portions Copyright (C) 2004-2005 Christoph Hellwig *
- * *
- * This program is free software; you can redistribute it and/or *
- * modify it under the terms of version 2 of the GNU General *
- * Public License as published by the Free Software Foundation. *
- * This program is distributed in the hope that it will be useful. *
- * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND *
- * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, *
- * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE *
- * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD *
- * TO BE LEGALLY INVALID. See the GNU General Public License for *
- * more details, a copy of which can be found in the file COPYING *
- * included with this package. *
- ********************************************************************/
-
-#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */
-#define LPFC_NVMET_RQE_MIN_POST 128
-#define LPFC_NVMET_RQE_DEF_POST 512
-#define LPFC_NVMET_RQE_DEF_COUNT 2048
-#define LPFC_NVMET_SUCCESS_LEN 12
-
-#define LPFC_NVMET_MRQ_AUTO 0
-#define LPFC_NVMET_MRQ_MAX 16
-
-#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC)
-
-/* Used for NVME Target */
-struct lpfc_nvmet_tgtport {
- struct lpfc_hba *phba;
- struct completion *tport_unreg_cmp;
-
- /* Stats counters - lpfc_nvmet_unsol_ls_buffer */
- atomic_t rcv_ls_req_in;
- atomic_t rcv_ls_req_out;
- atomic_t rcv_ls_req_drop;
- atomic_t xmt_ls_abort;
- atomic_t xmt_ls_abort_cmpl;
-
- /* Stats counters - lpfc_nvmet_xmt_ls_rsp */
- atomic_t xmt_ls_rsp;
- atomic_t xmt_ls_drop;
-
- /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */
- atomic_t xmt_ls_rsp_error;
- atomic_t xmt_ls_rsp_aborted;
- atomic_t xmt_ls_rsp_xb_set;
- atomic_t xmt_ls_rsp_cmpl;
-
- /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */
- atomic_t rcv_fcp_cmd_in;
- atomic_t rcv_fcp_cmd_out;
- atomic_t rcv_fcp_cmd_drop;
- atomic_t rcv_fcp_cmd_defer;
- atomic_t xmt_fcp_release;
-
- /* Stats counters - lpfc_nvmet_xmt_fcp_op */
- atomic_t xmt_fcp_drop;
- atomic_t xmt_fcp_read_rsp;
- atomic_t xmt_fcp_read;
- atomic_t xmt_fcp_write;
- atomic_t xmt_fcp_rsp;
-
- /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */
- atomic_t xmt_fcp_rsp_xb_set;
- atomic_t xmt_fcp_rsp_cmpl;
- atomic_t xmt_fcp_rsp_error;
- atomic_t xmt_fcp_rsp_aborted;
- atomic_t xmt_fcp_rsp_drop;
-
- /* Stats counters - lpfc_nvmet_xmt_fcp_abort */
- atomic_t xmt_fcp_xri_abort_cqe;
- atomic_t xmt_fcp_abort;
- atomic_t xmt_fcp_abort_cmpl;
- atomic_t xmt_abort_sol;
- atomic_t xmt_abort_unsol;
- atomic_t xmt_abort_rsp;
- atomic_t xmt_abort_rsp_error;
-
- /* Stats counters - defer IO */
- atomic_t defer_ctx;
- atomic_t defer_fod;
- atomic_t defer_wqfull;
-};
-
-struct lpfc_nvmet_ctx_info {
- struct list_head nvmet_ctx_list;
- spinlock_t nvmet_ctx_list_lock; /* lock per CPU */
- struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu;
- struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu;
- uint16_t nvmet_ctx_list_cnt;
- char pad[16]; /* pad to a cache-line */
-};
-
-/* This retrieves the context info associated with the specified cpu / mrq */
-#define lpfc_get_ctx_list(phba, cpu, mrq) \
- (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq))
-
-struct lpfc_nvmet_rcv_ctx {
- union {
- struct nvmefc_tgt_ls_req ls_req;
- struct nvmefc_tgt_fcp_req fcp_req;
- } ctx;
- struct list_head list;
- struct lpfc_hba *phba;
- struct lpfc_iocbq *wqeq;
- struct lpfc_iocbq *abort_wqeq;
- spinlock_t ctxlock; /* protect flag access */
- uint32_t sid;
- uint32_t offset;
- uint16_t oxid;
- uint16_t size;
- uint16_t entry_cnt;
- uint16_t cpu;
- uint16_t idx;
- uint16_t state;
- /* States */
-#define LPFC_NVMET_STE_LS_RCV 1
-#define LPFC_NVMET_STE_LS_ABORT 2
-#define LPFC_NVMET_STE_LS_RSP 3
-#define LPFC_NVMET_STE_RCV 4
-#define LPFC_NVMET_STE_DATA 5
-#define LPFC_NVMET_STE_ABORT 6
-#define LPFC_NVMET_STE_DONE 7
-#define LPFC_NVMET_STE_FREE 0xff
- uint16_t flag;
-#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */
-#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */
-#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */
-#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */
-#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */
-#define LPFC_NVMET_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */
-#define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */
-#define LPFC_NVMET_TNOTIFY 0x80 /* notify transport of abts */
- struct rqb_dmabuf *rqb_buffer;
- struct lpfc_nvmet_ctxbuf *ctxbuf;
- struct lpfc_sli4_hdw_queue *hdwq;
-
-#ifdef CONFIG_SCSI_LPFC_DEBUG_FS
- uint64_t ts_isr_cmd;
- uint64_t ts_cmd_nvme;
- uint64_t ts_nvme_data;
- uint64_t ts_data_wqput;
- uint64_t ts_isr_data;
- uint64_t ts_data_nvme;
- uint64_t ts_nvme_status;
- uint64_t ts_status_wqput;
- uint64_t ts_isr_status;
- uint64_t ts_status_nvme;
-#endif
-};
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index b6fb665e6ec4..9e21c4f3b009 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -39,8 +39,6 @@
#include <asm/set_memory.h>
#endif
-#include <linux/nvme-fc-driver.h>
-
#include "lpfc_hw4.h"
#include "lpfc_hw.h"
#include "lpfc_sli.h"
@@ -50,7 +48,6 @@
#include "lpfc.h"
#include "lpfc_scsi.h"
#include "lpfc_nvme.h"
-#include "lpfc_nvmet.h"
#include "lpfc_crtn.h"
#include "lpfc_logmsg.h"
#include "lpfc_compat.h"
@@ -2796,6 +2793,123 @@ lpfc_sli_get_buff(struct lpfc_hba *phba,
}
/**
+ * lpfc_nvme_unsol_ls_handler - Process an unsolicited event data buffer
+ * containing a NVME LS request.
+ * @phba: pointer to lpfc hba data structure.
+ * @piocb: pointer to the iocbq struct representing the sequence starting
+ * frame.
+ *
+ * This routine initially validates the NVME LS, validates there is a login
+ * with the port that sent the LS, and then calls the appropriate nvme host
+ * or target LS request handler.
+ **/
+static void
+lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb)
+{
+ struct lpfc_nodelist *ndlp;
+ struct lpfc_dmabuf *d_buf;
+ struct hbq_dmabuf *nvmebuf;
+ struct fc_frame_header *fc_hdr;
+ struct lpfc_async_xchg_ctx *axchg = NULL;
+ char *failwhy = NULL;
+ uint32_t oxid, sid, did, fctl, size;
+ int ret = 1;
+
+ d_buf = piocb->context2;
+
+ nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf);
+ fc_hdr = nvmebuf->hbuf.virt;
+ oxid = be16_to_cpu(fc_hdr->fh_ox_id);
+ sid = sli4_sid_from_fc_hdr(fc_hdr);
+ did = sli4_did_from_fc_hdr(fc_hdr);
+ fctl = (fc_hdr->fh_f_ctl[0] << 16 |
+ fc_hdr->fh_f_ctl[1] << 8 |
+ fc_hdr->fh_f_ctl[2]);
+ size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl);
+
+ lpfc_nvmeio_data(phba, "NVME LS RCV: xri x%x sz %d from %06x\n",
+ oxid, size, sid);
+
+ if (phba->pport->load_flag & FC_UNLOADING) {
+ failwhy = "Driver Unloading";
+ } else if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) {
+ failwhy = "NVME FC4 Disabled";
+ } else if (!phba->nvmet_support && !phba->pport->localport) {
+ failwhy = "No Localport";
+ } else if (phba->nvmet_support && !phba->targetport) {
+ failwhy = "No Targetport";
+ } else if (unlikely(fc_hdr->fh_r_ctl != FC_RCTL_ELS4_REQ)) {
+ failwhy = "Bad NVME LS R_CTL";
+ } else if (unlikely((fctl & 0x00FF0000) !=
+ (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT))) {
+ failwhy = "Bad NVME LS F_CTL";
+ } else {
+ axchg = kzalloc(sizeof(*axchg), GFP_ATOMIC);
+ if (!axchg)
+ failwhy = "No CTX memory";
+ }
+
+ if (unlikely(failwhy)) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+ "6154 Drop NVME LS: SID %06X OXID x%X: %s\n",
+ sid, oxid, failwhy);
+ goto out_fail;
+ }
+
+ /* validate the source of the LS is logged in */
+ ndlp = lpfc_findnode_did(phba->pport, sid);
+ if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) ||
+ ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) &&
+ (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) {
+ lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC,
+ "6216 NVME Unsol rcv: No ndlp: "
+ "NPort_ID x%x oxid x%x\n",
+ sid, oxid);
+ goto out_fail;
+ }
+
+ axchg->phba = phba;
+ axchg->ndlp = ndlp;
+ axchg->size = size;
+ axchg->oxid = oxid;
+ axchg->sid = sid;
+ axchg->wqeq = NULL;
+ axchg->state = LPFC_NVME_STE_LS_RCV;
+ axchg->entry_cnt = 1;
+ axchg->rqb_buffer = (void *)nvmebuf;
+ axchg->hdwq = &phba->sli4_hba.hdwq[0];
+ axchg->payload = nvmebuf->dbuf.virt;
+ INIT_LIST_HEAD(&axchg->list);
+
+ if (phba->nvmet_support)
+ ret = lpfc_nvmet_handle_lsreq(phba, axchg);
+ else
+ ret = lpfc_nvme_handle_lsreq(phba, axchg);
+
+ /* if zero, LS was successfully handled. If non-zero, LS not handled */
+ if (!ret)
+ return;
+
+ lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR,
+ "6155 Drop NVME LS from DID %06X: SID %06X OXID x%X "
+ "NVMe%s handler failed %d\n",
+ did, sid, oxid,
+ (phba->nvmet_support) ? "T" : "I", ret);
+
+out_fail:
+
+ /* recycle receive buffer */
+ lpfc_in_buf_free(phba, &nvmebuf->dbuf);
+
+ /* If start of new exchange, abort it */
+ if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX)))
+ ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid);
+
+ if (ret)
+ kfree(axchg);
+}
+
+/**
* lpfc_complete_unsol_iocb - Complete an unsolicited sequence
* @phba: Pointer to HBA context object.
* @pring: Pointer to driver SLI ring object.
@@ -2816,7 +2930,7 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring,
switch (fch_type) {
case FC_TYPE_NVME:
- lpfc_nvmet_unsol_ls_event(phba, pring, saveq);
+ lpfc_nvme_unsol_ls_handler(phba, saveq);
return 1;
default:
break;
@@ -13981,8 +14095,8 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq,
/* Just some basic sanity checks on FCP Command frame */
fctl = (fc_hdr->fh_f_ctl[0] << 16 |
- fc_hdr->fh_f_ctl[1] << 8 |
- fc_hdr->fh_f_ctl[2]);
+ fc_hdr->fh_f_ctl[1] << 8 |
+ fc_hdr->fh_f_ctl[2]);
if (((fctl &
(FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) !=
(FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) ||
@@ -19891,7 +20005,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp,
struct lpfc_iocbq *pwqe)
{
union lpfc_wqe128 *wqe = &pwqe->wqe;
- struct lpfc_nvmet_rcv_ctx *ctxp;
+ struct lpfc_async_xchg_ctx *ctxp;
struct lpfc_queue *wq;
struct lpfc_sglq *sglq;
struct lpfc_sli_ring *pring;