diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:37:03 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 15:37:03 -0700 |
commit | bce159d734091fe31340976081577333f52a85e4 (patch) | |
tree | 8396be51e6703797a60aefb4992e729f327d27c2 /drivers | |
parent | 750a02ab8d3c49ca7d23102be90d3d1db19e2827 (diff) | |
parent | 0c8d3fceade2ab1bbac68bca013e62bfdb851d19 (diff) |
Merge tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block
Pull block driver updates from Jens Axboe:
"On top of the core changes, here are the block driver changes for this
merge window:
- NVMe changes:
- NVMe over Fibre Channel protocol updates, which also reach
over to drivers/scsi/lpfc (James Smart)
- namespace revalidation support on the target (Anthony
Iliopoulos)
- gcc zero length array fix (Arnd Bergmann)
- nvmet cleanups (Chaitanya Kulkarni)
- misc cleanups and fixes (me, Keith Busch, Sagi Grimberg)
- use a SRQ per completion vector (Max Gurtovoy)
- fix handling of runtime changes to the queue count (Weiping
Zhang)
- t10 protection information support for nvme-rdma and
nvmet-rdma (Israel Rukshin and Max Gurtovoy)
- target side AEN improvements (Chaitanya Kulkarni)
- various fixes and minor improvements all over, icluding the
nvme part of the lpfc driver"
- Floppy code cleanup series (Willy, Denis)
- Floppy contention fix (Jiri)
- Loop CONFIGURE support (Martijn)
- bcache fixes/improvements (Coly, Joe, Colin)
- q->queuedata cleanups (Christoph)
- Get rid of ioctl_by_bdev (Christoph, Stefan)
- md/raid5 allocation fixes (Coly)
- zero length array fixes (Gustavo)
- swim3 task state fix (Xu)"
* tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block: (166 commits)
bcache: configure the asynchronous registertion to be experimental
bcache: asynchronous devices registration
bcache: fix refcount underflow in bcache_device_free()
bcache: Convert pr_<level> uses to a more typical style
bcache: remove redundant variables i and n
lpfc: Fix return value in __lpfc_nvme_ls_abort
lpfc: fix axchg pointer reference after free and double frees
lpfc: Fix pointer checks and comments in LS receive refactoring
nvme: set dma alignment to qword
nvmet: cleanups the loop in nvmet_async_events_process
nvmet: fix memory leak when removing namespaces and controllers concurrently
nvmet-rdma: add metadata/T10-PI support
nvmet: add metadata support for block devices
nvmet: add metadata/T10-PI support
nvme: add Metadata Capabilities enumerations
nvmet: rename nvmet_check_data_len to nvmet_check_transfer_len
nvmet: rename nvmet_rw_len to nvmet_rw_data_len
nvmet: add metadata characteristics for a namespace
nvme-rdma: add metadata/T10-PI support
nvme-rdma: introduce nvme_rdma_sgl structure
...
Diffstat (limited to 'drivers')
59 files changed, 4931 insertions, 2111 deletions
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c index c3daa64cb52c..3e9db22db2a8 100644 --- a/drivers/block/floppy.c +++ b/drivers/block/floppy.c @@ -337,8 +337,7 @@ static bool initialized; /* * globals used by 'result()' */ -#define MAX_REPLIES 16 -static unsigned char reply_buffer[MAX_REPLIES]; +static unsigned char reply_buffer[FD_RAW_REPLY_SIZE]; static int inr; /* size of reply buffer, when called from interrupt */ #define ST0 0 #define ST1 1 @@ -595,12 +594,12 @@ static unsigned char in_sector_offset; /* offset within physical sector, static inline unsigned char fdc_inb(int fdc, int reg) { - return fd_inb(fdc_state[fdc].address + reg); + return fd_inb(fdc_state[fdc].address, reg); } static inline void fdc_outb(unsigned char value, int fdc, int reg) { - fd_outb(value, fdc_state[fdc].address + reg); + fd_outb(value, fdc_state[fdc].address, reg); } static inline bool drive_no_geom(int drive) @@ -668,16 +667,12 @@ static struct output_log { static int output_log_pos; -#define current_reqD -1 #define MAXTIMEOUT -2 static void __reschedule_timeout(int drive, const char *message) { unsigned long delay; - if (drive == current_reqD) - drive = current_drive; - if (drive < 0 || drive >= N_DRIVE) { delay = 20UL * HZ; drive = 0; @@ -827,59 +822,70 @@ static int set_dor(int fdc, char mask, char data) return olddor; } -static void twaddle(void) +static void twaddle(int fdc, int drive) { - if (drive_params[current_drive].select_delay) + if (drive_params[drive].select_delay) return; - fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)), - current_fdc, FD_DOR); - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); - drive_state[current_drive].select_date = jiffies; + fdc_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(drive)), + fdc, FD_DOR); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); + drive_state[drive].select_date = jiffies; } /* - * Reset all driver information about the current fdc. + * Reset all driver information about the specified fdc. * This is needed after a reset, and after a raw command. */ -static void reset_fdc_info(int mode) +static void reset_fdc_info(int fdc, int mode) { int drive; - fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1; - fdc_state[current_fdc].need_configure = 1; - fdc_state[current_fdc].perp_mode = 1; - fdc_state[current_fdc].rawcmd = 0; + fdc_state[fdc].spec1 = fdc_state[fdc].spec2 = -1; + fdc_state[fdc].need_configure = 1; + fdc_state[fdc].perp_mode = 1; + fdc_state[fdc].rawcmd = 0; for (drive = 0; drive < N_DRIVE; drive++) - if (FDC(drive) == current_fdc && + if (FDC(drive) == fdc && (mode || drive_state[drive].track != NEED_1_RECAL)) drive_state[drive].track = NEED_2_RECAL; } -/* selects the fdc and drive, and enables the fdc's input/dma. */ +/* + * selects the fdc and drive, and enables the fdc's input/dma. + * Both current_drive and current_fdc are changed to match the new drive. + */ static void set_fdc(int drive) { - unsigned int new_fdc = current_fdc; + unsigned int fdc; - if (drive >= 0 && drive < N_DRIVE) { - new_fdc = FDC(drive); - current_drive = drive; + if (drive < 0 || drive >= N_DRIVE) { + pr_info("bad drive value %d\n", drive); + return; } - if (new_fdc >= N_FDC) { + + fdc = FDC(drive); + if (fdc >= N_FDC) { pr_info("bad fdc value\n"); return; } - current_fdc = new_fdc; - set_dor(current_fdc, ~0, 8); + + set_dor(fdc, ~0, 8); #if N_FDC > 1 - set_dor(1 - current_fdc, ~8, 0); + set_dor(1 - fdc, ~8, 0); #endif - if (fdc_state[current_fdc].rawcmd == 2) - reset_fdc_info(1); - if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY) - fdc_state[current_fdc].reset = 1; + if (fdc_state[fdc].rawcmd == 2) + reset_fdc_info(fdc, 1); + if (fdc_inb(fdc, FD_STATUS) != STATUS_READY) + fdc_state[fdc].reset = 1; + + current_drive = drive; + current_fdc = fdc; } -/* locks the driver */ +/* + * locks the driver. + * Both current_drive and current_fdc are changed to match the new drive. + */ static int lock_fdc(int drive) { if (WARN(atomic_read(&usage_count) == 0, @@ -1062,12 +1068,9 @@ static void setup_DMA(void) unsigned long f; if (raw_cmd->length == 0) { - int i; - - pr_info("zero dma transfer size:"); - for (i = 0; i < raw_cmd->cmd_count; i++) - pr_cont("%x,", raw_cmd->cmd[i]); - pr_cont("\n"); + print_hex_dump(KERN_INFO, "zero dma transfer size: ", + DUMP_PREFIX_NONE, 16, 1, + raw_cmd->fullcmd, raw_cmd->cmd_count, false); cont->done(0); fdc_state[current_fdc].reset = 1; return; @@ -1104,62 +1107,62 @@ static void setup_DMA(void) #endif } -static void show_floppy(void); +static void show_floppy(int fdc); /* waits until the fdc becomes ready */ -static int wait_til_ready(void) +static int wait_til_ready(int fdc) { int status; int counter; - if (fdc_state[current_fdc].reset) + if (fdc_state[fdc].reset) return -1; for (counter = 0; counter < 10000; counter++) { - status = fdc_inb(current_fdc, FD_STATUS); + status = fdc_inb(fdc, FD_STATUS); if (status & STATUS_READY) return status; } if (initialized) { - DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc); - show_floppy(); + DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc); + show_floppy(fdc); } - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; return -1; } /* sends a command byte to the fdc */ -static int output_byte(char byte) +static int output_byte(int fdc, char byte) { - int status = wait_til_ready(); + int status = wait_til_ready(fdc); if (status < 0) return -1; if (is_ready_state(status)) { - fdc_outb(byte, current_fdc, FD_DATA); + fdc_outb(byte, fdc, FD_DATA); output_log[output_log_pos].data = byte; output_log[output_log_pos].status = status; output_log[output_log_pos].jiffies = jiffies; output_log_pos = (output_log_pos + 1) % OLOGSIZE; return 0; } - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; if (initialized) { DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n", - byte, current_fdc, status); - show_floppy(); + byte, fdc, status); + show_floppy(fdc); } return -1; } /* gets the response from the fdc */ -static int result(void) +static int result(int fdc) { int i; int status = 0; - for (i = 0; i < MAX_REPLIES; i++) { - status = wait_til_ready(); + for (i = 0; i < FD_RAW_REPLY_SIZE; i++) { + status = wait_til_ready(fdc); if (status < 0) break; status &= STATUS_DIR | STATUS_READY | STATUS_BUSY | STATUS_DMA; @@ -1169,24 +1172,24 @@ static int result(void) return i; } if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY)) - reply_buffer[i] = fdc_inb(current_fdc, FD_DATA); + reply_buffer[i] = fdc_inb(fdc, FD_DATA); else break; } if (initialized) { DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n", - current_fdc, status, i); - show_floppy(); + fdc, status, i); + show_floppy(fdc); } - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; return -1; } #define MORE_OUTPUT -2 /* does the fdc need more output? */ -static int need_more_output(void) +static int need_more_output(int fdc) { - int status = wait_til_ready(); + int status = wait_til_ready(fdc); if (status < 0) return -1; @@ -1194,13 +1197,13 @@ static int need_more_output(void) if (is_ready_state(status)) return MORE_OUTPUT; - return result(); + return result(fdc); } /* Set perpendicular mode as required, based on data rate, if supported. * 82077 Now tested. 1Mbps data rate only possible with 82077-1. */ -static void perpendicular_mode(void) +static void perpendicular_mode(int fdc) { unsigned char perp_mode; @@ -1215,7 +1218,7 @@ static void perpendicular_mode(void) default: DPRINT("Invalid data rate for perpendicular mode!\n"); cont->done(0); - fdc_state[current_fdc].reset = 1; + fdc_state[fdc].reset = 1; /* * convenient way to return to * redo without too much hassle @@ -1226,12 +1229,12 @@ static void perpendicular_mode(void) } else perp_mode = 0; - if (fdc_state[current_fdc].perp_mode == perp_mode) + if (fdc_state[fdc].perp_mode == perp_mode) return; - if (fdc_state[current_fdc].version >= FDC_82077_ORIG) { - output_byte(FD_PERPENDICULAR); - output_byte(perp_mode); - fdc_state[current_fdc].perp_mode = perp_mode; + if (fdc_state[fdc].version >= FDC_82077_ORIG) { + output_byte(fdc, FD_PERPENDICULAR); + output_byte(fdc, perp_mode); + fdc_state[fdc].perp_mode = perp_mode; } else if (perp_mode) { DPRINT("perpendicular mode not supported by this FDC.\n"); } @@ -1240,16 +1243,15 @@ static void perpendicular_mode(void) static int fifo_depth = 0xa; static int no_fifo; -static int fdc_configure(void) +static int fdc_configure(int fdc) { /* Turn on FIFO */ - output_byte(FD_CONFIGURE); - if (need_more_output() != MORE_OUTPUT) + output_byte(fdc, FD_CONFIGURE); + if (need_more_output(fdc) != MORE_OUTPUT) return 0; - output_byte(0); - output_byte(0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); - output_byte(0); /* pre-compensation from track - 0 upwards */ + output_byte(fdc, 0); + output_byte(fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); + output_byte(fdc, 0); /* pre-compensation from track 0 upwards */ return 1; } @@ -1274,7 +1276,7 @@ static int fdc_configure(void) * * These values are rounded up to the next highest available delay time. */ -static void fdc_specify(void) +static void fdc_specify(int fdc, int drive) { unsigned char spec1; unsigned char spec2; @@ -1286,10 +1288,10 @@ static void fdc_specify(void) int hlt_max_code = 0x7f; int hut_max_code = 0xf; - if (fdc_state[current_fdc].need_configure && - fdc_state[current_fdc].version >= FDC_82072A) { - fdc_configure(); - fdc_state[current_fdc].need_configure = 0; + if (fdc_state[fdc].need_configure && + fdc_state[fdc].version >= FDC_82072A) { + fdc_configure(fdc); + fdc_state[fdc].need_configure = 0; } switch (raw_cmd->rate & 0x03) { @@ -1298,13 +1300,13 @@ static void fdc_specify(void) break; case 1: dtr = 300; - if (fdc_state[current_fdc].version >= FDC_82078) { + if (fdc_state[fdc].version >= FDC_82078) { /* chose the default rate table, not the one * where 1 = 2 Mbps */ - output_byte(FD_DRIVESPEC); - if (need_more_output() == MORE_OUTPUT) { - output_byte(UNIT(current_drive)); - output_byte(0xc0); + output_byte(fdc, FD_DRIVESPEC); + if (need_more_output(fdc) == MORE_OUTPUT) { + output_byte(fdc, UNIT(drive)); + output_byte(fdc, 0xc0); } } break; @@ -1313,14 +1315,14 @@ static void fdc_specify(void) break; } - if (fdc_state[current_fdc].version >= FDC_82072) { + if (fdc_state[fdc].version >= FDC_82072) { scale_dtr = dtr; hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */ hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */ } /* Convert step rate from microseconds to milliseconds and 4 bits */ - srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000, + srt = 16 - DIV_ROUND_UP(drive_params[drive].srt * scale_dtr / 1000, NOMINAL_DTR); if (slow_floppy) srt = srt / 4; @@ -1328,14 +1330,14 @@ static void fdc_specify(void) SUPBOUND(srt, 0xf); INFBOUND(srt, 0); - hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2, + hlt = DIV_ROUND_UP(drive_params[drive].hlt * scale_dtr / 2, NOMINAL_DTR); if (hlt < 0x01) hlt = 0x01; else if (hlt > 0x7f) hlt = hlt_max_code; - hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16, + hut = DIV_ROUND_UP(drive_params[drive].hut * scale_dtr / 16, NOMINAL_DTR); if (hut < 0x1) hut = 0x1; @@ -1346,12 +1348,12 @@ static void fdc_specify(void) spec2 = (hlt << 1) | (use_virtual_dma & 1); /* If these parameters did not change, just return with success */ - if (fdc_state[current_fdc].spec1 != spec1 || - fdc_state[current_fdc].spec2 != spec2) { + if (fdc_state[fdc].spec1 != spec1 || + fdc_state[fdc].spec2 != spec2) { /* Go ahead and set spec1 and spec2 */ - output_byte(FD_SPECIFY); - output_byte(fdc_state[current_fdc].spec1 = spec1); - output_byte(fdc_state[current_fdc].spec2 = spec2); + output_byte(fdc, FD_SPECIFY); + output_byte(fdc, fdc_state[fdc].spec1 = spec1); + output_byte(fdc, fdc_state[fdc].spec2 = spec2); } } /* fdc_specify */ @@ -1513,7 +1515,7 @@ static void setup_rw_floppy(void) r = 0; for (i = 0; i < raw_cmd->cmd_count; i++) - r |= output_byte(raw_cmd->cmd[i]); + r |= output_byte(current_fdc, raw_cmd->fullcmd[i]); debugt(__func__, "rw_command"); @@ -1524,7 +1526,7 @@ static void setup_rw_floppy(void) } if (!(flags & FD_RAW_INTR)) { - inr = result(); + inr = result(current_fdc); cont->interrupt(); } else if (flags & FD_RAW_NEED_DISK) fd_watchdog(); @@ -1562,29 +1564,29 @@ static void seek_interrupt(void) floppy_ready(); } -static void check_wp(void) +static void check_wp(int fdc, int drive) { - if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) { + if (test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)) { /* check write protection */ - output_byte(FD_GETSTATUS); - output_byte(UNIT(current_drive)); - if (result() != 1) { - fdc_state[current_fdc].reset = 1; + output_byte(fdc, FD_GETSTATUS); + output_byte(fdc, UNIT(drive)); + if (result(fdc) != 1) { + fdc_state[fdc].reset = 1; return; } - clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags); + clear_bit(FD_VERIFY_BIT, &drive_state[drive].flags); clear_bit(FD_NEED_TWADDLE_BIT, - &drive_state[current_drive].flags); - debug_dcl(drive_params[current_drive].flags, + &drive_state[drive].flags); + debug_dcl(drive_params[drive].flags, "checking whether disk is write protected\n"); - debug_dcl(drive_params[current_drive].flags, "wp=%x\n", + debug_dcl(drive_params[drive].flags, "wp=%x\n", reply_buffer[ST3] & 0x40); if (!(reply_buffer[ST3] & 0x40)) set_bit(FD_DISK_WRITABLE_BIT, - &drive_state[current_drive].flags); + &drive_state[drive].flags); else clear_bit(FD_DISK_WRITABLE_BIT, - &drive_state[current_drive].flags); + &drive_state[drive].flags); } } @@ -1628,7 +1630,7 @@ static void seek_floppy(void) track = 1; } } else { - check_wp(); + check_wp(current_fdc, current_drive); if (raw_cmd->track != drive_state[current_drive].track && (raw_cmd->flags & FD_RAW_NEED_SEEK)) track = raw_cmd->track; @@ -1639,9 +1641,9 @@ static void seek_floppy(void) } do_floppy = seek_interrupt; - output_byte(FD_SEEK); - output_byte(UNIT(current_drive)); - if (output_byte(track) < 0) { + output_byte(current_fdc, FD_SEEK); + output_byte(current_fdc, UNIT(current_drive)); + if (output_byte(current_fdc, track) < 0) { reset_fdc(); return; } @@ -1742,14 +1744,14 @@ irqreturn_t floppy_interrupt(int irq, void *dev_id) do_print = !handler && print_unex && initialized; - inr = result(); + inr = result(current_fdc); if (do_print) print_result("unexpected interrupt", inr); if (inr == 0) { int max_sensei = 4; do { - output_byte(FD_SENSEI); - inr = result(); + output_byte(current_fdc, FD_SENSEI); + inr = result(current_fdc); if (do_print) print_result("sensei", inr); max_sensei--; @@ -1771,8 +1773,8 @@ static void recalibrate_floppy(void) { debugt(__func__, ""); do_floppy = recal_interrupt; - output_byte(FD_RECALIBRATE); - if (output_byte(UNIT(current_drive)) < 0) + output_byte(current_fdc, FD_RECALIBRATE); + if (output_byte(current_fdc, UNIT(current_drive)) < 0) reset_fdc(); } @@ -1782,7 +1784,7 @@ static void recalibrate_floppy(void) static void reset_interrupt(void) { debugt(__func__, ""); - result(); /* get the status ready for set_fdc */ + result(current_fdc); /* get the status ready for set_fdc */ if (fdc_state[current_fdc].reset) { pr_info("reset set in interrupt, calling %ps\n", cont->error); cont->error(); /* a reset just after a reset. BAD! */ @@ -1792,7 +1794,9 @@ static void reset_interrupt(void) /* * reset is done by pulling bit 2 of DOR low for a while (old FDCs), - * or by setting the self clearing bit 7 of STATUS (newer FDCs) + * or by setting the self clearing bit 7 of STATUS (newer FDCs). + * This WILL trigger an interrupt, causing the handlers in the current + * cont's ->redo() to be called via reset_interrupt(). */ static void reset_fdc(void) { @@ -1800,7 +1804,7 @@ static void reset_fdc(void) do_floppy = reset_interrupt; fdc_state[current_fdc].reset = 0; - reset_fdc_info(0); + reset_fdc_info(current_fdc, 0); /* Pseudo-DMA may intercept 'reset finished' interrupt. */ /* Irrelevant for systems with true DMA (i386). */ @@ -1819,7 +1823,7 @@ static void reset_fdc(void) } } -static void show_floppy(void) +static void show_floppy(int fdc) { int i; @@ -1842,7 +1846,7 @@ static void show_floppy(void) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, reply_buffer, resultsize, true); - pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS)); + pr_info("status=%x\n", fdc_inb(fdc, FD_STATUS)); pr_info("fdc_busy=%lu\n", fdc_busy); if (do_floppy) pr_info("do_floppy=%ps\n", do_floppy); @@ -1868,7 +1872,7 @@ static void floppy_shutdown(struct work_struct *arg) unsigned long flags; if (initialized) - show_floppy(); + show_floppy(current_fdc); cancel_activity(); flags = claim_dma_lock(); @@ -1934,7 +1938,7 @@ static void floppy_ready(void) "calling disk change from floppy_ready\n"); if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) && disk_change(current_drive) && !drive_params[current_drive].select_delay) - twaddle(); /* this clears the dcl on certain + twaddle(current_fdc, current_drive); /* this clears the dcl on certain * drive/controller combinations */ #ifdef fd_chose_dma_mode @@ -1946,20 +1950,20 @@ static void floppy_ready(void) #endif if (raw_cmd->flags & (FD_RAW_NEED_SEEK | FD_RAW_NEED_DISK)) { - perpendicular_mode(); - fdc_specify(); /* must be done here because of hut, hlt ... */ + perpendicular_mode(current_fdc); + fdc_specify(current_fdc, current_drive); /* must be done here because of hut, hlt ... */ seek_floppy(); } else { if ((raw_cmd->flags & FD_RAW_READ) || (raw_cmd->flags & FD_RAW_WRITE)) - fdc_specify(); + fdc_specify(current_fdc, current_drive); setup_rw_floppy(); } } static void floppy_start(void) { - reschedule_timeout(current_reqD, "floppy start"); + reschedule_timeout(current_drive, "floppy start"); scandrives(); debug_dcl(drive_params[current_drive].flags, @@ -2004,6 +2008,9 @@ static const struct cont_t intr_cont = { .done = (done_f)empty }; +/* schedules handler, waiting for completion. May be interrupted, will then + * return -EINTR, in which case the driver will automatically be unlocked. + */ static int wait_til_done(void (*handler)(void), bool interruptible) { int ret; @@ -2059,18 +2066,19 @@ static void success_and_wakeup(void) * ========================== */ -static int next_valid_format(void) +static int next_valid_format(int drive) { int probed_format; - probed_format = drive_state[current_drive].probed_format; + probed_format = drive_state[drive].probed_format; while (1) { - if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) { - drive_state[current_drive].probed_format = 0; + if (probed_format >= FD_AUTODETECT_SIZE || + !drive_params[drive].autodetect[probed_format]) { + drive_state[drive].probed_format = 0; return 1; } - if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) { - drive_state[current_drive].probed_format = probed_format; + if (floppy_type[drive_params[drive].autodetect[probed_format]].sect) { + drive_state[drive].probed_format = probed_format; return 0; } probed_format++; @@ -2083,7 +2091,7 @@ static void bad_flp_intr(void) if (probing) { drive_state[current_drive].probed_format++; - if (!next_valid_format()) + if (!next_valid_format(current_drive)) return; } err_count = ++(*errors); @@ -2843,6 +2851,9 @@ static int set_next_request(void) return current_req != NULL; } +/* Starts or continues processing request. Will automatically unlock the + * driver at end of request. + */ static void redo_fd_request(void) { int drive; @@ -2867,7 +2878,7 @@ do_request: } drive = (long)current_req->rq_disk->private_data; set_fdc(drive); - reschedule_timeout(current_reqD, "redo fd request"); + reschedule_timeout(current_drive, "redo fd request"); set_floppy(drive); raw_cmd = &default_raw_cmd; @@ -2885,7 +2896,7 @@ do_request: if (!_floppy) { /* Autodetection */ if (!probing) { drive_state[current_drive].probed_format = 0; - if (next_valid_format()) { + if (next_valid_format(current_drive)) { DPRINT("no autodetectable formats\n"); _floppy = NULL; request_done(0); @@ -2904,7 +2915,7 @@ do_request: } if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) - twaddle(); + twaddle(current_fdc, current_drive); schedule_bh(floppy_start); debugt(__func__, "queue fd request"); return; @@ -2917,6 +2928,7 @@ static const struct cont_t rw_cont = { .done = request_done }; +/* schedule the request and automatically unlock the driver on completion */ static void process_fd_request(void) { cont = &rw_cont; @@ -2938,17 +2950,17 @@ static blk_status_t floppy_queue_rq(struct blk_mq_hw_ctx *hctx, (unsigned long long) current_req->cmd_flags)) return BLK_STS_IOERR; - spin_lock_irq(&floppy_lock); - list_add_tail(&bd->rq->queuelist, &floppy_reqs); - spin_unlock_irq(&floppy_lock); - if (test_and_set_bit(0, &fdc_busy)) { /* fdc busy, this new request will be treated when the current one is done */ is_alive(__func__, "old request running"); - return BLK_STS_OK; + return BLK_STS_RESOURCE; } + spin_lock_irq(&floppy_lock); + list_add_tail(&bd->rq->queuelist, &floppy_reqs); + spin_unlock_irq(&floppy_lock); + command_status = FD_COMMAND_NONE; __reschedule_timeout(MAXTIMEOUT, "fd_request"); set_fdc(0); @@ -2996,6 +3008,10 @@ static const struct cont_t reset_cont = { .done = generic_done }; +/* + * Resets the FDC connected to drive <drive>. + * Both current_drive and current_fdc are changed to match the new drive. + */ static int user_reset_fdc(int drive, int arg, bool interruptible) { int ret; @@ -3006,6 +3022,9 @@ static int user_reset_fdc(int drive, int arg, bool interruptible) if (arg == FD_RESET_ALWAYS) fdc_state[current_fdc].reset = 1; if (fdc_state[current_fdc].reset) { + /* note: reset_fdc will take care of unlocking the driver + * on completion. + */ cont = &reset_cont; ret = wait_til_done(reset_fdc, interruptible); if (ret == -EINTR) @@ -3059,7 +3078,7 @@ static void raw_cmd_done(int flag) raw_cmd->flags |= FD_RAW_HARDFAILURE; } else { raw_cmd->reply_count = inr; - if (raw_cmd->reply_count > MAX_REPLIES) + if (raw_cmd->reply_count > FD_RAW_REPLY_SIZE) raw_cmd->reply_count = 0; for (i = 0; i < raw_cmd->reply_count; i++) raw_cmd->reply[i] = reply_buffer[i]; @@ -3170,18 +3189,10 @@ loop: if (ret) return -EFAULT; param += sizeof(struct floppy_raw_cmd); - if (ptr->cmd_count > 33) - /* the command may now also take up the space - * initially intended for the reply & the - * reply count. Needed for long 82078 commands - * such as RESTORE, which takes ... 17 command - * bytes. Murphy's law #137: When you reserve - * 16 bytes for a structure, you'll one day - * discover that you really need 17... - */ + if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE) return -EINVAL; - for (i = 0; i < 16; i++) + for (i = 0; i < FD_RAW_REPLY_SIZE; i++) ptr->reply[i] = 0; ptr->resultcode = 0; @@ -3423,13 +3434,13 @@ static int fd_getgeo(struct block_device *bdev, struct hd_geometry *geo) return 0; } -static bool valid_floppy_drive_params(const short autodetect[8], +static bool valid_floppy_drive_params(const short autodetect[FD_AUTODETECT_SIZE], int native_format) { size_t floppy_type_size = ARRAY_SIZE(floppy_type); size_t i = 0; - for (i = 0; i < 8; ++i) { + for (i = 0; i < FD_AUTODETECT_SIZE; ++i) { if (autodetect[i] < 0 || autodetect[i] >= floppy_type_size) return false; @@ -3610,7 +3621,7 @@ static int fd_locked_ioctl(struct block_device *bdev, fmode_t mode, unsigned int case FDTWADDLE: if (lock_fdc(drive)) return -EINTR; - twaddle(); + twaddle(current_fdc, current_drive); process_fd_request(); return 0; default: @@ -3654,7 +3665,7 @@ struct compat_floppy_drive_params { struct floppy_max_errors max_errors; char flags; char read_track; - short autodetect[8]; + short autodetect[FD_AUTODETECT_SIZE]; compat_int_t checkfreq; compat_int_t native_format; }; @@ -4298,79 +4309,79 @@ static const struct block_device_operations floppy_fops = { /* Determine the floppy disk controller type */ /* This routine was written by David C. Niemi */ -static char __init get_fdc_version(void) +static char __init get_fdc_version(int fdc) { int r; - output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */ - if (fdc_state[current_fdc].reset) + output_byte(fdc, FD_DUMPREGS); /* 82072 and better know DUMPREGS */ + if (fdc_state[fdc].reset) return FDC_NONE; - r = result(); + r = result(fdc); if (r <= 0x00) return FDC_NONE; /* No FDC present ??? */ if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is an 8272A\n", current_fdc); + pr_info("FDC %d is an 8272A\n", fdc); return FDC_8272A; /* 8272a/765 don't know DUMPREGS */ } if (r != 10) { pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n", - current_fdc, r); + fdc, r); return FDC_UNKNOWN; } - if (!fdc_configure()) { - pr_info("FDC %d is an 82072\n", current_fdc); + if (!fdc_configure(fdc)) { + pr_info("FDC %d is an 82072\n", fdc); return FDC_82072; /* 82072 doesn't know CONFIGURE */ } - output_byte(FD_PERPENDICULAR); - if (need_more_output() == MORE_OUTPUT) { - output_byte(0); + output_byte(fdc, FD_PERPENDICULAR); + if (need_more_output(fdc) == MORE_OUTPUT) { + output_byte(fdc, 0); } else { - pr_info("FDC %d is an 82072A\n", current_fdc); + pr_info("FDC %d is an 82072A\n", fdc); return FDC_82072A; /* 82072A as found on Sparcs. */ } - output_byte(FD_UNLOCK); - r = result(); + output_byte(fdc, FD_UNLOCK); + r = result(fdc); if ((r == 1) && (reply_buffer[0] == 0x80)) { - pr_info("FDC %d is a pre-1991 82077\n", current_fdc); + pr_info("FDC %d is a pre-1991 82077\n", fdc); return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know * LOCK/UNLOCK */ } if ((r != 1) || (reply_buffer[0] != 0x00)) { pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n", - current_fdc, r); + fdc, r); return FDC_UNKNOWN; } - output_byte(FD_PARTID); - r = result(); + output_byte(fdc, FD_PARTID); + r = result(fdc); if (r != 1) { pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n", - current_fdc, r); + fdc, r); return FDC_UNKNOWN; } if (reply_buffer[0] == 0x80) { - pr_info("FDC %d is a post-1991 82077\n", current_fdc); + pr_info("FDC %d is a post-1991 82077\n", fdc); return FDC_82077; /* Revised 82077AA passes all the tests */ } switch (reply_buffer[0] >> 5) { case 0x0: /* Either a 82078-1 or a 82078SL running at 5Volt */ - pr_info("FDC %d is an 82078.\n", current_fdc); + pr_info("FDC %d is an 82078.\n", fdc); return FDC_82078; case 0x1: - pr_info("FDC %d is a 44pin 82078\n", current_fdc); + pr_info("FDC %d is a 44pin 82078\n", fdc); return FDC_82078; case 0x2: - pr_info("FDC %d is a S82078B\n", current_fdc); + pr_info("FDC %d is a S82078B\n", fdc); return FDC_S82078B; case 0x3: - pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc); + pr_info("FDC %d is a National Semiconductor PC87306\n", fdc); return FDC_87306; default: pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n", - current_fdc, reply_buffer[0] >> 5); + fdc, reply_buffer[0] >> 5); return FDC_82078_UNKN; } } /* get_fdc_version */ @@ -4534,11 +4545,13 @@ static void floppy_device_release(struct device *dev) static int floppy_resume(struct device *dev) { int fdc; + int saved_drive; + saved_drive = current_drive; for (fdc = 0; fdc < N_FDC; fdc++) if (fdc_state[fdc].address != -1) - user_reset_fdc(-1, FD_RESET_ALWAYS, false); - + user_reset_fdc(REVDRIVE(fdc, 0), FD_RESET_ALWAYS, false); + set_fdc(saved_drive); return 0; } @@ -4646,16 +4659,15 @@ static int __init do_floppy_init(void) config_types(); for (i = 0; i < N_FDC; i++) { - current_fdc = i; - memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state)); - fdc_state[current_fdc].dtr = -1; - fdc_state[current_fdc].dor = 0x4; + memset(&fdc_state[i], 0, sizeof(*fdc_state)); + fdc_state[i].dtr = -1; + fdc_state[i].dor = 0x4; #if defined(__sparc__) || defined(__mc68000__) /*sparcs/sun3x don't have a DOR reset which we can fall back on to */ #ifdef __mc68000__ if (MACH_IS_SUN3X) #endif - fdc_state[current_fdc].version = FDC_82072A; + fdc_state[i].version = FDC_82072A; #endif } @@ -4697,30 +4709,29 @@ static int __init do_floppy_init(void) msleep(10); for (i = 0; i < N_FDC; i++) { - current_fdc = i; - fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION; + fdc_state[i].driver_version = FD_DRIVER_VERSION; for (unit = 0; unit < 4; unit++) - fdc_state[current_fdc].track[unit] = 0; - if (fdc_state[current_fdc].address == -1) + fdc_state[i].track[unit] = 0; + if (fdc_state[i].address == -1) continue; - fdc_state[current_fdc].rawcmd = 2; - if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) { + fdc_state[i].rawcmd = 2; + if (user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false)) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(current_fdc); - fdc_state[current_fdc].address = -1; - fdc_state[current_fdc].version = FDC_NONE; + floppy_release_regions(i); + fdc_state[i].address = -1; + fdc_state[i].version = FDC_NONE; continue; } /* Try to determine the floppy controller type */ - fdc_state[current_fdc].version = get_fdc_version(); - if (fdc_state[current_fdc].version == FDC_NONE) { + fdc_state[i].version = get_fdc_version(i); + if (fdc_state[i].version == FDC_NONE) { /* free ioports reserved by floppy_grab_irq_and_dma() */ - floppy_release_regions(current_fdc); - fdc_state[current_fdc].address = -1; + floppy_release_regions(i); + fdc_state[i].address = -1; continue; } if (can_use_virtual_dma == 2 && - fdc_state[current_fdc].version < FDC_82072A) + fdc_state[i].version < FDC_82072A) can_use_virtual_dma = 0; have_no_fdc = 0; @@ -4728,7 +4739,7 @@ static int __init do_floppy_init(void) * properly, so force a reset for the standard FDC clones, * to avoid interrupt garbage. */ - user_reset_fdc(-1, FD_RESET_ALWAYS, false); + user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false); } current_fdc = 0; cancel_delayed_work(&fd_timeout); @@ -4855,6 +4866,8 @@ static void floppy_release_regions(int fdc) static int floppy_grab_irq_and_dma(void) { + int fdc; + if (atomic_inc_return(&usage_count) > 1) return 0; @@ -4882,24 +4895,24 @@ static int floppy_grab_irq_and_dma(void) } } - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { - if (fdc_state[current_fdc].address != -1) { - if (floppy_request_regions(current_fdc)) + for (fdc = 0; fdc < N_FDC; fdc++) { + if (fdc_state[fdc].address != -1) { + if (floppy_request_regions(fdc)) goto cleanup; } } - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { - if (fdc_state[current_fdc].address != -1) { - reset_fdc_info(1); - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); + for (fdc = 0; fdc < N_FDC; fdc++) { + if (fdc_state[fdc].address != -1) { + reset_fdc_info(fdc, 1); + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); } } - current_fdc = 0; + set_dor(0, ~0, 8); /* avoid immediate interrupt */ - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) - if (fdc_state[current_fdc].address != -1) - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); + for (fdc = 0; fdc < N_FDC; fdc++) + if (fdc_state[fdc].address != -1) + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); /* * The driver will try and free resources and relies on us * to know if they were allocated or not. @@ -4910,15 +4923,16 @@ static int floppy_grab_irq_and_dma(void) cleanup: fd_free_irq(); fd_free_dma(); - while (--current_fdc >= 0) - floppy_release_regions(current_fdc); + while (--fdc >= 0) + floppy_release_regions(fdc); + current_fdc = 0; atomic_dec(&usage_count); return -1; } static void floppy_release_irq_and_dma(void) { - int old_fdc; + int fdc; #ifndef __sparc__ int drive; #endif @@ -4959,11 +4973,9 @@ static void floppy_release_irq_and_dma(void) pr_info("auxiliary floppy timer still active\n"); if (work_pending(&floppy_work)) pr_info("work still pending\n"); - old_fdc = current_fdc; - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) - if (fdc_state[current_fdc].address != -1) - floppy_release_regions(current_fdc); - current_fdc = old_fdc; + for (fdc = 0; fdc < N_FDC; fdc++) + if (fdc_state[fdc].address != -1) + floppy_release_regions(fdc); } #ifdef MODULE diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 13dbe2f16882..4212288ab157 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -228,26 +228,36 @@ static void __loop_update_dio(struct loop_device *lo, bool dio) blk_mq_unfreeze_queue(lo->lo_queue); } +/** + * loop_validate_block_size() - validates the passed in block size + * @bsize: size to validate + */ static int -figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) +loop_validate_block_size(unsigned short bsize) { - loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); - sector_t x = (sector_t)size; - struct block_device *bdev = lo->lo_device; + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) + return -EINVAL; - if (unlikely((loff_t)x != size)) - return -EFBIG; - if (lo->lo_offset != offset) - lo->lo_offset = offset; - if (lo->lo_sizelimit != sizelimit) - lo->lo_sizelimit = sizelimit; - set_capacity(lo->lo_disk, x); - bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); return 0; } +/** + * loop_set_size() - sets device size and notifies userspace + * @lo: struct loop_device to set the size for + * @size: new size of the loop device + * + * Callers must validate that the size passed into this function fits into + * a sector_t, eg using loop_validate_size() + */ +static void loop_set_size(struct loop_device *lo, loff_t size) +{ + struct block_device *bdev = lo->lo_device; + + bd_set_size(bdev, size << SECTOR_SHIFT); + + set_capacity_revalidate_and_notify(lo->lo_disk, size, false); +} + static inline int lo_do_transfer(struct loop_device *lo, int cmd, struct page *rpage, unsigned roffs, @@ -952,23 +962,125 @@ static void loop_update_rotational(struct loop_device *lo) blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); } -static int loop_set_fd(struct loop_device *lo, fmode_t mode, - struct block_device *bdev, unsigned int arg) +static int +loop_release_xfer(struct loop_device *lo) +{ + int err = 0; + struct loop_func_table *xfer = lo->lo_encryption; + + if (xfer) { + if (xfer->release) + err = xfer->release(lo); + lo->transfer = NULL; + lo->lo_encryption = NULL; + module_put(xfer->owner); + } + return err; +} + +static int +loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, + const struct loop_info64 *i) +{ + int err = 0; + + if (xfer) { + struct module *owner = xfer->owner; + + if (!try_module_get(owner)) + return -EINVAL; + if (xfer->init) + err = xfer->init(lo, i); + if (err) + module_put(owner); + else + lo->lo_encryption = xfer; + } + return err; +} + +/** + * loop_set_status_from_info - configure device from loop_info + * @lo: struct loop_device to configure + * @info: struct loop_info64 to configure the device with + * + * Configures the loop device parameters according to the passed + * in loop_info64 configuration. + */ +static int +loop_set_status_from_info(struct loop_device *lo, + const struct loop_info64 *info) +{ + int err; + struct loop_func_table *xfer; + kuid_t uid = current_uid(); + + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) + return -EINVAL; + + err = loop_release_xfer(lo); + if (err) + return err; + + if (info->lo_encrypt_type) { + unsigned int type = info->lo_encrypt_type; + + if (type >= MAX_LO_CRYPT) + return -EINVAL; + xfer = xfer_funcs[type]; + if (xfer == NULL) + return -EINVAL; + } else + xfer = NULL; + + err = loop_init_xfer(lo, xfer, info); + if (err) + return err; + + lo->lo_offset = info->lo_offset; + lo->lo_sizelimit = info->lo_sizelimit; + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); + memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); + lo->lo_file_name[LO_NAME_SIZE-1] = 0; + lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; + + if (!xfer) + xfer = &none_funcs; + lo->transfer = xfer->transfer; + lo->ioctl = xfer->ioctl; + + lo->lo_flags = info->lo_flags; + + lo->lo_encrypt_key_size = info->lo_encrypt_key_size; + lo->lo_init[0] = info->lo_init[0]; + lo->lo_init[1] = info->lo_init[1]; + if (info->lo_encrypt_key_size) { + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, + info->lo_encrypt_key_size); + lo->lo_key_owner = uid; + } + + return 0; +} + +static int loop_configure(struct loop_device *lo, fmode_t mode, + struct block_device *bdev, + const struct loop_config *config) { struct file *file; struct inode *inode; struct address_space *mapping; struct block_device *claimed_bdev = NULL; - int lo_flags = 0; int error; loff_t size; bool partscan; + unsigned short bsize; /* This is safe, since we have a reference from open(). */ __module_get(THIS_MODULE); error = -EBADF; - file = fget(arg); + file = fget(config->fd); if (!file) goto out; @@ -977,7 +1089,7 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, * here to avoid changing device under exclusive owner. */ if (!(mode & FMODE_EXCL)) { - claimed_bdev = bd_start_claiming(bdev, loop_set_fd); + claimed_bdev = bd_start_claiming(bdev, loop_configure); if (IS_ERR(claimed_bdev)) { error = PTR_ERR(claimed_bdev); goto out_putf; @@ -999,52 +1111,58 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, mapping = file->f_mapping; inode = mapping->host; + size = get_loop_size(lo, file); + + if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) { + error = -EINVAL; + goto out_unlock; + } + + if (config->block_size) { + error = loop_validate_block_size(config->block_size); + if (error) + goto out_unlock; + } + + error = loop_set_status_from_info(lo, &config->info); + if (error) + goto out_unlock; + if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || !file->f_op->write_iter) - lo_flags |= LO_FLAGS_READ_ONLY; + lo->lo_flags |= LO_FLAGS_READ_ONLY; - error = -EFBIG; - size = get_loop_size(lo, file); - if ((loff_t)(sector_t)size != size) - goto out_unlock; error = loop_prepare_queue(lo); if (error) goto out_unlock; - error = 0; - - set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); + set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); - lo->use_dio = false; + lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; lo->lo_device = bdev; - lo->lo_flags = lo_flags; lo->lo_backing_file = file; - lo->transfer = NULL; - lo->ioctl = NULL; - lo->lo_sizelimit = 0; lo->old_gfp_mask = mapping_gfp_mask(mapping); mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); - if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) + if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) blk_queue_write_cache(lo->lo_queue, true, false); - if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) { + if (config->block_size) + bsize = config->block_size; + else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) /* In case of direct I/O, match underlying block size */ - unsigned short bsize = bdev_logical_block_size( - inode->i_sb->s_bdev); + bsize = bdev_logical_block_size(inode->i_sb->s_bdev); + else + bsize = 512; - blk_queue_logical_block_size(lo->lo_queue, bsize); - blk_queue_physical_block_size(lo->lo_queue, bsize); - blk_queue_io_min(lo->lo_queue, bsize); - } + blk_queue_logical_block_size(lo->lo_queue, bsize); + blk_queue_physical_block_size(lo->lo_queue, bsize); + blk_queue_io_min(lo->lo_queue, bsize); loop_update_rotational(lo); loop_update_dio(lo); - set_capacity(lo->lo_disk, size); - bd_set_size(bdev, size << 9); loop_sysfs_init(lo); - /* let user-space know about the new size */ - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); + loop_set_size(lo, size); set_blocksize(bdev, S_ISBLK(inode->i_mode) ? block_size(inode->i_bdev) : PAGE_SIZE); @@ -1062,14 +1180,14 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, if (partscan) loop_reread_partitions(lo, bdev); if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); + bd_abort_claiming(bdev, claimed_bdev, loop_configure); return 0; out_unlock: mutex_unlock(&loop_ctl_mutex); out_bdev: if (claimed_bdev) - bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); + bd_abort_claiming(bdev, claimed_bdev, loop_configure); out_putf: fput(file); out: @@ -1078,43 +1196,6 @@ out: return error; } -static int -loop_release_xfer(struct loop_device *lo) -{ - int err = 0; - struct loop_func_table *xfer = lo->lo_encryption; - - if (xfer) { - if (xfer->release) - err = xfer->release(lo); - lo->transfer = NULL; - lo->lo_encryption = NULL; - module_put(xfer->owner); - } - return err; -} - -static int -loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, - const struct loop_info64 *i) -{ - int err = 0; - - if (xfer) { - struct module *owner = xfer->owner; - - if (!try_module_get(owner)) - return -EINVAL; - if (xfer->init) - err = xfer->init(lo, i); - if (err) - module_put(owner); - else - lo->lo_encryption = xfer; - } - return err; -} - static int __loop_clr_fd(struct loop_device *lo, bool release) { struct file *filp = NULL; @@ -1263,10 +1344,11 @@ static int loop_set_status(struct loop_device *lo, const struct loop_info64 *info) { int err; - struct loop_func_table *xfer; - kuid_t uid = current_uid(); struct block_device *bdev; + kuid_t uid = current_uid(); + int prev_lo_flags; bool partscan = false; + bool size_changed = false; err = mutex_lock_killable(&loop_ctl_mutex); if (err) @@ -1281,13 +1363,10 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) err = -ENXIO; goto out_unlock; } - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) { - err = -EINVAL; - goto out_unlock; - } if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { + size_changed = true; sync_blockdev(lo->lo_device); kill_bdev(lo->lo_device); } @@ -1295,79 +1374,44 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); - err = loop_release_xfer(lo); - if (err) + if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { + /* If any pages were dirtied after kill_bdev(), try again */ + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); goto out_unfreeze; + } - if (info->lo_encrypt_type) { - unsigned int type = info->lo_encrypt_type; - - if (type >= MAX_LO_CRYPT) { - err = -EINVAL; - goto out_unfreeze; - } - xfer = xfer_funcs[type]; - if (xfer == NULL) { - err = -EINVAL; - goto out_unfreeze; - } - } else - xfer = NULL; + prev_lo_flags = lo->lo_flags; - err = loop_init_xfer(lo, xfer, info); + err = loop_set_status_from_info(lo, info); if (err) goto out_unfreeze; - if (lo->lo_offset != info->lo_offset || - lo->lo_sizelimit != info->lo_sizelimit) { - /* kill_bdev should have truncated all the pages */ - if (lo->lo_device->bd_inode->i_mapping->nrpages) { - err = -EAGAIN; - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", - __func__, lo->lo_number, lo->lo_file_name, - lo->lo_device->bd_inode->i_mapping->nrpages); - goto out_unfreeze; - } - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { - err = -EFBIG; - goto out_unfreeze; - } + /* Mask out flags that can't be set using LOOP_SET_STATUS. */ + lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS; + /* For those flags, use the previous values instead */ + lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS; + /* For flags that can't be cleared, use previous values too */ + lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS; + + if (size_changed) { + loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, + lo->lo_backing_file); + loop_set_size(lo, new_size); } loop_config_discard(lo); - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); - lo->lo_file_name[LO_NAME_SIZE-1] = 0; - lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; - - if (!xfer) - xfer = &none_funcs; - lo->transfer = xfer->transfer; - lo->ioctl = xfer->ioctl; - - if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != - (info->lo_flags & LO_FLAGS_AUTOCLEAR)) - lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; - - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; - lo->lo_init[0] = info->lo_init[0]; - lo->lo_init[1] = info->lo_init[1]; - if (info->lo_encrypt_key_size) { - memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, - info->lo_encrypt_key_size); - lo->lo_key_owner = uid; - } - /* update dio if lo_offset or transfer is changed */ __loop_update_dio(lo, lo->use_dio); out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && - !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { - lo->lo_flags |= LO_FLAGS_PARTSCAN; + if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && + !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; bdev = lo->lo_device; partscan = true; @@ -1531,10 +1575,15 @@ loop_get_status64(struct loop_device *lo, struct loop_info64 __user *arg) { static int loop_set_capacity(struct loop_device *lo) { + loff_t size; + if (unlikely(lo->lo_state != Lo_bound)) return -ENXIO; - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); + size = get_loop_size(lo, lo->lo_backing_file); + loop_set_size(lo, size); + + return 0; } static int loop_set_dio(struct loop_device *lo, unsigned long arg) @@ -1558,8 +1607,9 @@ static int loop_set_block_size(struct loop_device *lo, unsigned long arg) if (lo->lo_state != Lo_bound) return -ENXIO; - if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) - return -EINVAL; + err = loop_validate_block_size(arg); + if (err) + return err; if (lo->lo_queue->limits.logical_block_size == arg) return 0; @@ -1617,11 +1667,31 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg) { struct loop_device *lo = bdev->bd_disk->private_data; + void __user *argp = (void __user *) arg; int err; switch (cmd) { - case LOOP_SET_FD: - return loop_set_fd(lo, mode, bdev, arg); + case LOOP_SET_FD: { + /* + * Legacy case - pass in a zeroed out struct loop_config with + * only the file descriptor set , which corresponds with the + * default parameters we'd have used otherwise. + */ + struct loop_config config; + + memset(&config, 0, sizeof(config)); + config.fd = arg; + + return loop_configure(lo, mode, bdev, &config); + } + case LOOP_CONFIGURE: { + struct loop_config config; + + if (copy_from_user(&config, argp, sizeof(config))) + return -EFAULT; + + return loop_configure(lo, mode, bdev, &config); + } case LOOP_CHANGE_FD: return loop_change_fd(lo, bdev, arg); case LOOP_CLR_FD: @@ -1629,21 +1699,19 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_SET_STATUS: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { - err = loop_set_status_old(lo, - (struct loop_info __user *)arg); + err = loop_set_status_old(lo, argp); } break; case LOOP_GET_STATUS: - return loop_get_status_old(lo, (struct loop_info __user *) arg); + return loop_get_status_old(lo, argp); case LOOP_SET_STATUS64: err = -EPERM; if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { - err = loop_set_status64(lo, - (struct loop_info64 __user *) arg); + err = loop_set_status64(lo, argp); } break; case LOOP_GET_STATUS64: - return loop_get_status64(lo, (struct loop_info64 __user *) arg); + return loop_get_status64(lo, argp); case LOOP_SET_CAPACITY: case LOOP_SET_DIRECT_IO: case LOOP_SET_BLOCK_SIZE: @@ -1795,6 +1863,7 @@ static int lo_compat_ioctl(struct block_device *bdev, fmode_t mode, case LOOP_CLR_FD: case LOOP_GET_STATUS64: case LOOP_SET_STATUS64: + case LOOP_CONFIGURE: arg = (unsigned long) compat_ptr(arg); /* fall through */ case LOOP_SET_FD: diff --git a/drivers/block/swim.c b/drivers/block/swim.c index 4c297f69171d..dd34504382e5 100644 --- a/drivers/block/swim.c +++ b/drivers/block/swim.c @@ -327,7 +327,7 @@ static inline void swim_motor(struct swim __iomem *base, swim_select(base, RELAX); if (swim_readbit(base, MOTOR_ON)) break; - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } } else if (action == OFF) { @@ -346,7 +346,7 @@ static inline void swim_eject(struct swim __iomem *base) swim_select(base, RELAX); if (!swim_readbit(base, DISK_IN)) break; - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); } swim_select(base, RELAX); @@ -370,7 +370,7 @@ static inline int swim_step(struct swim __iomem *base) for (wait = 0; wait < HZ; wait++) { - current->state = TASK_INTERRUPTIBLE; + set_current_state(TASK_INTERRUPTIBLE); schedule_timeout(1); swim_select(base, RELAX); diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index 6dfa653d30db..bf7dd96db9b3 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -26,3 +26,12 @@ config BCACHE_CLOSURES_DEBUG Keeps all active closures in a linked list and provides a debugfs interface to list them, which makes it possible to see asynchronous operations that get stuck. + +config BCACHE_ASYNC_REGISTRAION + bool "Asynchronous device registration (EXPERIMENTAL)" + depends on BCACHE + help + Add a sysfs file /sys/fs/bcache/register_async. Writing registering + device path into this file will returns immediately and the real + registration work is handled in kernel work queue in asynchronous + way. diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 74a9849ea164..221e0191b687 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -176,7 +176,7 @@ * - updates to non leaf nodes just happen synchronously (see btree_split()). */ -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ #include <linux/bcache.h> #include <linux/bio.h> diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 4385303836d8..4995fcaefe29 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -6,7 +6,7 @@ * Copyright 2012 Google, Inc. */ -#define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ +#define pr_fmt(fmt) "bcache: %s() " fmt, __func__ #include "util.h" #include "bset.h" @@ -31,7 +31,7 @@ void bch_dump_bset(struct btree_keys *b, struct bset *i, unsigned int set) if (b->ops->key_dump) b->ops->key_dump(b, k); else - pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); + pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); if (next < bset_bkey_last(i) && bkey_cmp(k, b->ops->is_extents ? @@ -1225,7 +1225,7 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out, out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; - pr_debug("sorted %i keys", out->keys); + pr_debug("sorted %i keys\n", out->keys); } static void __btree_sort(struct btree_keys *b, struct btree_iter *iter, diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 72856e5f23a3..39de94edd73a 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -619,7 +619,7 @@ retry: * and BTREE_NODE_journal_flush bit cleared by btree_flush_write(). */ if (btree_node_journal_flush(b)) { - pr_debug("bnode %p is flushing by journal, retry", b); + pr_debug("bnode %p is flushing by journal, retry\n", b); mutex_unlock(&b->write_lock); udelay(1); goto retry; @@ -802,7 +802,7 @@ int bch_btree_cache_alloc(struct cache_set *c) c->shrink.batch = c->btree_pages * 2; if (register_shrinker(&c->shrink)) - pr_warn("bcache: %s: could not register shrinker", + pr_warn("bcache: %s: could not register shrinker\n", __func__); return 0; @@ -1054,7 +1054,7 @@ retry: */ if (btree_node_journal_flush(b)) { mutex_unlock(&b->write_lock); - pr_debug("bnode %p journal_flush set, retry", b); + pr_debug("bnode %p journal_flush set, retry\n", b); udelay(1); goto retry; } @@ -1798,7 +1798,7 @@ static void bch_btree_gc(struct cache_set *c) schedule_timeout_interruptible(msecs_to_jiffies (GC_SLEEP_MS)); else if (ret) - pr_warn("gc failed!"); + pr_warn("gc failed!\n"); } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); bch_btree_gc_finish(c); @@ -1907,10 +1907,8 @@ static int bch_btree_check_thread(void *arg) struct btree_iter iter; struct bkey *k, *p; int cur_idx, prev_idx, skip_nr; - int i, n; k = p = NULL; - i = n = 0; cur_idx = prev_idx = 0; ret = 0; @@ -2045,7 +2043,7 @@ int bch_btree_check(struct cache_set *c) &check_state->infos[i], name); if (IS_ERR(check_state->infos[i].thread)) { - pr_err("fails to run thread bch_btrchk[%d]", i); + pr_err("fails to run thread bch_btrchk[%d]\n", i); for (--i; i >= 0; i--) kthread_stop(check_state->infos[i].thread); ret = -ENOMEM; @@ -2456,7 +2454,7 @@ int bch_btree_insert(struct cache_set *c, struct keylist *keys, if (ret) { struct bkey *k; - pr_err("error %i", ret); + pr_err("error %i\n", ret); while ((k = bch_keylist_pop(keys))) bkey_put(c, k); @@ -2744,7 +2742,7 @@ struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *c, break; if (bkey_cmp(&buf->last_scanned, end) >= 0) { - pr_debug("scan finished"); + pr_debug("scan finished\n"); break; } diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 886710043025..9162af5bb6ec 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c @@ -130,18 +130,18 @@ static void bch_bkey_dump(struct btree_keys *keys, const struct bkey *k) char buf[80]; bch_extent_to_text(buf, sizeof(buf), k); - pr_err(" %s", buf); + pr_cont(" %s", buf); for (j = 0; j < KEY_PTRS(k); j++) { size_t n = PTR_BUCKET_NR(b->c, k, j); - pr_err(" bucket %zu", n); + pr_cont(" bucket %zu", n); if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) - pr_err(" prio %i", - PTR_BUCKET(b->c, k, j)->prio); + pr_cont(" prio %i", + PTR_BUCKET(b->c, k, j)->prio); } - pr_err(" %s\n", bch_ptr_status(b->c, k)); + pr_cont(" %s\n", bch_ptr_status(b->c, k)); } /* Btree ptrs */ @@ -553,7 +553,7 @@ static bool bch_extent_bad(struct btree_keys *bk, const struct bkey *k) if (stale && KEY_DIRTY(k)) { bch_extent_to_text(buf, sizeof(buf), k); - pr_info("stale dirty pointer, stale %u, key: %s", + pr_info("stale dirty pointer, stale %u, key: %s\n", stale, buf); } diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 4d93f07f63e5..b25ee33b0d0b 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -65,14 +65,14 @@ void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio) * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. */ if (bio->bi_opf & REQ_RAHEAD) { - pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore", + pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n", dc->backing_dev_name); return; } errors = atomic_add_return(1, &dc->io_errors); if (errors < dc->error_limit) - pr_err("%s: IO error on backing device, unrecoverable", + pr_err("%s: IO error on backing device, unrecoverable\n", dc->backing_dev_name); else bch_cached_dev_error(dc); @@ -123,12 +123,12 @@ void bch_count_io_errors(struct cache *ca, errors >>= IO_ERROR_SHIFT; if (errors < ca->set->error_limit) - pr_err("%s: IO error on %s%s", + pr_err("%s: IO error on %s%s\n", ca->cache_dev_name, m, is_read ? ", recovering." : "."); else bch_cache_set_error(ca->set, - "%s: too many IO errors %s", + "%s: too many IO errors %s\n", ca->cache_dev_name, m); } } diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 0e3ff9745ac7..90aac4e2333f 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c @@ -47,7 +47,7 @@ static int journal_read_bucket(struct cache *ca, struct list_head *list, closure_init_stack(&cl); - pr_debug("reading %u", bucket_index); + pr_debug("reading %u\n", bucket_index); while (offset < ca->sb.bucket_size) { reread: left = ca->sb.bucket_size - offset; @@ -78,13 +78,13 @@ reread: left = ca->sb.bucket_size - offset; size_t blocks, bytes = set_bytes(j); if (j->magic != jset_magic(&ca->sb)) { - pr_debug("%u: bad magic", bucket_index); + pr_debug("%u: bad magic\n", bucket_index); return ret; } if (bytes > left << 9 || bytes > PAGE_SIZE << JSET_BITS) { - pr_info("%u: too big, %zu bytes, offset %u", + pr_info("%u: too big, %zu bytes, offset %u\n", bucket_index, bytes, offset); return ret; } @@ -93,7 +93,7 @@ reread: left = ca->sb.bucket_size - offset; goto reread; if (j->csum != csum_set(j)) { - pr_info("%u: bad csum, %zu bytes, offset %u", + pr_info("%u: bad csum, %zu bytes, offset %u\n", bucket_index, bytes, offset); return ret; } @@ -190,7 +190,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) uint64_t seq; bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); - pr_debug("%u journal buckets", ca->sb.njournal_buckets); + pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); /* * Read journal buckets ordered by golden ratio hash to quickly @@ -215,7 +215,7 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) * If that fails, check all the buckets we haven't checked * already */ - pr_debug("falling back to linear search"); + pr_debug("falling back to linear search\n"); for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); l < ca->sb.njournal_buckets; @@ -233,7 +233,7 @@ bsearch: /* Binary search */ m = l; r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); - pr_debug("starting binary search, l %u r %u", l, r); + pr_debug("starting binary search, l %u r %u\n", l, r); while (l + 1 < r) { seq = list_entry(list->prev, struct journal_replay, @@ -253,7 +253,7 @@ bsearch: * Read buckets in reverse order until we stop finding more * journal entries */ - pr_debug("finishing up: m %u njournal_buckets %u", + pr_debug("finishing up: m %u njournal_buckets %u\n", m, ca->sb.njournal_buckets); l = m; @@ -370,10 +370,10 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) if (n != i->j.seq) { if (n == start && is_discard_enabled(s)) - pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)", + pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n", n, i->j.seq - 1, start, end); else { - pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", + pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n", n, i->j.seq - 1, start, end); ret = -EIO; goto err; @@ -403,7 +403,7 @@ int bch_journal_replay(struct cache_set *s, struct list_head *list) entries++; } - pr_info("journal replay done, %i keys in %i entries, seq %llu", + pr_info("journal replay done, %i keys in %i entries, seq %llu\n", keys, entries, end); err: while (!list_empty(list)) { @@ -481,7 +481,7 @@ static void btree_flush_write(struct cache_set *c) break; if (btree_node_journal_flush(b)) - pr_err("BUG: flush_write bit should not be set here!"); + pr_err("BUG: flush_write bit should not be set here!\n"); mutex_lock(&b->write_lock); @@ -534,13 +534,13 @@ static void btree_flush_write(struct cache_set *c) for (i = 0; i < nr; i++) { b = btree_nodes[i]; if (!b) { - pr_err("BUG: btree_nodes[%d] is NULL", i); + pr_err("BUG: btree_nodes[%d] is NULL\n", i); continue; } /* safe to check without holding b->write_lock */ if (!btree_node_journal_flush(b)) { - pr_err("BUG: bnode %p: journal_flush bit cleaned", b); + pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b); continue; } @@ -548,14 +548,14 @@ static void btree_flush_write(struct cache_set *c) if (!btree_current_write(b)->journal) { clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); - pr_debug("bnode %p: written by others", b); + pr_debug("bnode %p: written by others\n", b); continue; } if (!btree_node_dirty(b)) { clear_bit(BTREE_NODE_journal_flush, &b->flags); mutex_unlock(&b->write_lock); - pr_debug("bnode %p: dirty bit cleaned by others", b); + pr_debug("bnode %p: dirty bit cleaned by others\n", b); continue; } @@ -716,7 +716,7 @@ void bch_journal_next(struct journal *j) j->cur->data->keys = 0; if (fifo_full(&j->pin)) - pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); + pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin)); } static void journal_write_endio(struct bio *bio) diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 22b483527176..7acf024e99f3 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -110,7 +110,7 @@ static void bch_data_invalidate(struct closure *cl) struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); struct bio *bio = op->bio; - pr_debug("invalidating %i sectors from %llu", + pr_debug("invalidating %i sectors from %llu\n", bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector); while (bio_sectors(bio)) { @@ -396,7 +396,7 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { - pr_debug("skipping unaligned io"); + pr_debug("skipping unaligned io\n"); goto skip; } @@ -650,7 +650,7 @@ static void backing_request_endio(struct bio *bio) */ if (unlikely(s->iop.writeback && bio->bi_opf & REQ_PREFLUSH)) { - pr_err("Can't flush %s: returned bi_status %i", + pr_err("Can't flush %s: returned bi_status %i\n", dc->backing_dev_name, bio->bi_status); } else { /* set to orig_bio->bi_status in bio_complete() */ diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index d98354fa28e3..f9975c22bf7e 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -89,7 +89,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, for (i = 0; i < SB_JOURNAL_BUCKETS; i++) sb->d[i] = le64_to_cpu(s->d[i]); - pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u", + pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n", sb->version, sb->flags, sb->seq, sb->keys); err = "Not a bcache superblock (bad offset)"; @@ -234,7 +234,7 @@ static void __write_super(struct cache_sb *sb, struct cache_sb_disk *out, out->csum = csum_set(out); - pr_debug("ver %llu, flags %llu, seq %llu", + pr_debug("ver %llu, flags %llu, seq %llu\n", sb->version, sb->flags, sb->seq); submit_bio(bio); @@ -365,11 +365,11 @@ static void uuid_io(struct cache_set *c, int op, unsigned long op_flags, } bch_extent_to_text(buf, sizeof(buf), k); - pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf); + pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf); for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) if (!bch_is_zero(u->uuid, 16)) - pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u", + pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n", u - c->uuids, u->uuid, u->label, u->first_reg, u->last_reg, u->invalidated); @@ -534,7 +534,7 @@ int bch_prio_write(struct cache *ca, bool wait) struct bucket *b; struct closure cl; - pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu", + pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n", fifo_used(&ca->free[RESERVE_PRIO]), fifo_used(&ca->free[RESERVE_NONE]), fifo_used(&ca->free_inc)); @@ -629,12 +629,12 @@ static int prio_read(struct cache *ca, uint64_t bucket) if (p->csum != bch_crc64(&p->magic, bucket_bytes(ca) - 8)) { - pr_warn("bad csum reading priorities"); + pr_warn("bad csum reading priorities\n"); goto out; } if (p->magic != pset_magic(&ca->sb)) { - pr_warn("bad magic reading priorities"); + pr_warn("bad magic reading priorities\n"); goto out; } @@ -728,11 +728,11 @@ static void bcache_device_link(struct bcache_device *d, struct cache_set *c, ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); if (ret < 0) - pr_err("Couldn't create device -> cache set symlink"); + pr_err("Couldn't create device -> cache set symlink\n"); ret = sysfs_create_link(&c->kobj, &d->kobj, d->name); if (ret < 0) - pr_err("Couldn't create cache set -> device symlink"); + pr_err("Couldn't create cache set -> device symlink\n"); clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); } @@ -789,15 +789,17 @@ static void bcache_device_free(struct bcache_device *d) lockdep_assert_held(&bch_register_lock); if (disk) - pr_info("%s stopped", disk->disk_name); + pr_info("%s stopped\n", disk->disk_name); else - pr_err("bcache device (NULL gendisk) stopped"); + pr_err("bcache device (NULL gendisk) stopped\n"); if (d->c) bcache_device_detach(d); if (disk) { - if (disk->flags & GENHD_FL_UP) + bool disk_added = (disk->flags & GENHD_FL_UP) != 0; + + if (disk_added) del_gendisk(disk); if (disk->queue) @@ -805,7 +807,8 @@ static void bcache_device_free(struct bcache_device *d) ida_simple_remove(&bcache_device_idx, first_minor_to_idx(disk->first_minor)); - put_disk(disk); + if (disk_added) + put_disk(disk); } bioset_exit(&d->bio_split); @@ -830,7 +833,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size, d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); if (!d->nr_stripes || d->nr_stripes > max_stripes) { - pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", + pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n", (unsigned int)d->nr_stripes); return -ENOMEM; } @@ -928,11 +931,11 @@ static int cached_dev_status_update(void *arg) dc->offline_seconds = 0; if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { - pr_err("%s: device offline for %d seconds", + pr_err("%s: device offline for %d seconds\n", dc->backing_dev_name, BACKING_DEV_OFFLINE_TIMEOUT); - pr_err("%s: disable I/O request due to backing " - "device offline", dc->disk.name); + pr_err("%s: disable I/O request due to backing device offline\n", + dc->disk.name); dc->io_disable = true; /* let others know earlier that io_disable is true */ smp_mb(); @@ -959,7 +962,7 @@ int bch_cached_dev_run(struct cached_dev *dc) }; if (dc->io_disable) { - pr_err("I/O disabled on cached dev %s", + pr_err("I/O disabled on cached dev %s\n", dc->backing_dev_name); kfree(env[1]); kfree(env[2]); @@ -971,7 +974,7 @@ int bch_cached_dev_run(struct cached_dev *dc) kfree(env[1]); kfree(env[2]); kfree(buf); - pr_info("cached dev %s is running already", + pr_info("cached dev %s is running already\n", dc->backing_dev_name); return -EBUSY; } @@ -1001,16 +1004,14 @@ int bch_cached_dev_run(struct cached_dev *dc) if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || sysfs_create_link(&disk_to_dev(d->disk)->kobj, &d->kobj, "bcache")) { - pr_err("Couldn't create bcache dev <-> disk sysfs symlinks"); + pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n"); return -ENOMEM; } dc->status_update_thread = kthread_run(cached_dev_status_update, dc, "bcache_status_update"); if (IS_ERR(dc->status_update_thread)) { - pr_warn("failed to create bcache_status_update kthread, " - "continue to run without monitoring backing " - "device status"); + pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n"); } return 0; @@ -1036,7 +1037,7 @@ static void cancel_writeback_rate_update_dwork(struct cached_dev *dc) } while (time_out > 0); if (time_out == 0) - pr_warn("give up waiting for dc->writeback_write_update to quit"); + pr_warn("give up waiting for dc->writeback_write_update to quit\n"); cancel_delayed_work_sync(&dc->writeback_rate_update); } @@ -1077,7 +1078,7 @@ static void cached_dev_detach_finish(struct work_struct *w) mutex_unlock(&bch_register_lock); - pr_info("Caching disabled for %s", dc->backing_dev_name); + pr_info("Caching disabled for %s\n", dc->backing_dev_name); /* Drop ref we took in cached_dev_detach() */ closure_put(&dc->disk.cl); @@ -1117,20 +1118,20 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, return -ENOENT; if (dc->disk.c) { - pr_err("Can't attach %s: already attached", + pr_err("Can't attach %s: already attached\n", dc->backing_dev_name); return -EINVAL; } if (test_bit(CACHE_SET_STOPPING, &c->flags)) { - pr_err("Can't attach %s: shutting down", + pr_err("Can't attach %s: shutting down\n", dc->backing_dev_name); return -EINVAL; } if (dc->sb.block_size < c->sb.block_size) { /* Will die */ - pr_err("Couldn't attach %s: block size less than set's block size", + pr_err("Couldn't attach %s: block size less than set's block size\n", dc->backing_dev_name); return -EINVAL; } @@ -1138,7 +1139,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, /* Check whether already attached */ list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { - pr_err("Tried to attach %s but duplicate UUID already attached", + pr_err("Tried to attach %s but duplicate UUID already attached\n", dc->backing_dev_name); return -EINVAL; @@ -1157,14 +1158,14 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, if (!u) { if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { - pr_err("Couldn't find uuid for %s in set", + pr_err("Couldn't find uuid for %s in set\n", dc->backing_dev_name); return -ENOENT; } u = uuid_find_empty(c); if (!u) { - pr_err("Not caching %s, no room for UUID", + pr_err("Not caching %s, no room for UUID\n", dc->backing_dev_name); return -EINVAL; } @@ -1210,7 +1211,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, down_write(&dc->writeback_lock); if (bch_cached_dev_writeback_start(dc)) { up_write(&dc->writeback_lock); - pr_err("Couldn't start writeback facilities for %s", + pr_err("Couldn't start writeback facilities for %s\n", dc->disk.disk->disk_name); return -ENOMEM; } @@ -1233,7 +1234,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, */ kthread_stop(dc->writeback_thread); cancel_writeback_rate_update_dwork(dc); - pr_err("Couldn't run cached device %s", + pr_err("Couldn't run cached device %s\n", dc->backing_dev_name); return ret; } @@ -1244,7 +1245,7 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c, /* Allow the writeback thread to proceed */ up_write(&dc->writeback_lock); - pr_info("Caching %s as %s on set %pU", + pr_info("Caching %s as %s on set %pU\n", dc->backing_dev_name, dc->disk.disk->disk_name, dc->disk.c->sb.set_uuid); @@ -1384,7 +1385,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) goto err; - pr_info("registered backing device %s", dc->backing_dev_name); + pr_info("registered backing device %s\n", dc->backing_dev_name); list_add(&dc->list, &uncached_devices); /* attach to a matched cache set if it exists */ @@ -1401,7 +1402,7 @@ static int register_bdev(struct cache_sb *sb, struct cache_sb_disk *sb_disk, return 0; err: - pr_notice("error %s: %s", dc->backing_dev_name, err); + pr_notice("error %s: %s\n", dc->backing_dev_name, err); bcache_device_stop(&dc->disk); return ret; } @@ -1497,7 +1498,7 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size) u = uuid_find_empty(c); if (!u) { - pr_err("Can't create volume, no room for UUID"); + pr_err("Can't create volume, no room for UUID\n"); return -EINVAL; } @@ -1523,7 +1524,7 @@ bool bch_cached_dev_error(struct cached_dev *dc) smp_mb(); pr_err("stop %s: too many IO errors on backing device %s\n", - dc->disk.disk->disk_name, dc->backing_dev_name); + dc->disk.disk->disk_name, dc->backing_dev_name); bcache_device_stop(&dc->disk); return true; @@ -1534,6 +1535,7 @@ bool bch_cached_dev_error(struct cached_dev *dc) __printf(2, 3) bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) { + struct va_format vaf; va_list args; if (c->on_error != ON_ERROR_PANIC && @@ -1541,20 +1543,22 @@ bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) return false; if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_info("CACHE_SET_IO_DISABLE already set"); + pr_info("CACHE_SET_IO_DISABLE already set\n"); /* * XXX: we can be called from atomic context * acquire_console_sem(); */ - pr_err("bcache: error on %pU: ", c->sb.set_uuid); - va_start(args, fmt); - vprintk(fmt, args); - va_end(args); - pr_err(", disabling caching\n"); + vaf.fmt = fmt; + vaf.va = &args; + + pr_err("error on %pU: %pV, disabling caching\n", + c->sb.set_uuid, &vaf); + + va_end(args); if (c->on_error == ON_ERROR_PANIC) panic("panic forced after error\n"); @@ -1606,7 +1610,7 @@ static void cache_set_free(struct closure *cl) list_del(&c->list); mutex_unlock(&bch_register_lock); - pr_info("Cache set %pU unregistered", c->sb.set_uuid); + pr_info("Cache set %pU unregistered\n", c->sb.set_uuid); wake_up(&unregister_wait); closure_debug_destroy(&c->cl); @@ -1677,7 +1681,7 @@ static void conditional_stop_bcache_device(struct cache_set *c, struct cached_dev *dc) { if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { - pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.", + pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n", d->disk->disk_name, c->sb.set_uuid); bcache_device_stop(d); } else if (atomic_read(&dc->has_dirty)) { @@ -1685,7 +1689,7 @@ static void conditional_stop_bcache_device(struct cache_set *c, * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO * and dc->has_dirty == 1 */ - pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n", d->disk->disk_name); /* * There might be a small time gap that cache set is @@ -1707,7 +1711,7 @@ static void conditional_stop_bcache_device(struct cache_set *c, * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO * and dc->has_dirty == 0 */ - pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.", + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n", d->disk->disk_name); } } @@ -1874,7 +1878,7 @@ static int run_cache_set(struct cache_set *c) if (bch_journal_read(c, &journal)) goto err; - pr_debug("btree_journal_read() done"); + pr_debug("btree_journal_read() done\n"); err = "no journal entries found"; if (list_empty(&journal)) @@ -1920,7 +1924,7 @@ static int run_cache_set(struct cache_set *c) bch_journal_mark(c, &journal); bch_initial_gc_finish(c); - pr_debug("btree_check() done"); + pr_debug("btree_check() done\n"); /* * bcache_journal_next() can't happen sooner, or @@ -1951,7 +1955,7 @@ static int run_cache_set(struct cache_set *c) if (bch_journal_replay(c, &journal)) goto err; } else { - pr_notice("invalidating existing data"); + pr_notice("invalidating existing data\n"); for_each_cache(ca, c, i) { unsigned int j; @@ -2085,7 +2089,7 @@ found: memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); c->sb.flags = ca->sb.flags; c->sb.seq = ca->sb.seq; - pr_debug("set version = %llu", c->sb.version); + pr_debug("set version = %llu\n", c->sb.version); } kobject_get(&ca->kobj); @@ -2247,7 +2251,7 @@ err_btree_alloc: err_free: module_put(THIS_MODULE); if (err) - pr_notice("error %s: %s", ca->cache_dev_name, err); + pr_notice("error %s: %s\n", ca->cache_dev_name, err); return ret; } @@ -2301,14 +2305,14 @@ static int register_cache(struct cache_sb *sb, struct cache_sb_disk *sb_disk, goto out; } - pr_info("registered cache device %s", ca->cache_dev_name); + pr_info("registered cache device %s\n", ca->cache_dev_name); out: kobject_put(&ca->kobj); err: if (err) - pr_notice("error %s: %s", ca->cache_dev_name, err); + pr_notice("error %s: %s\n", ca->cache_dev_name, err); return ret; } @@ -2323,6 +2327,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, kobj_attribute_write(register, register_bcache); kobj_attribute_write(register_quiet, register_bcache); +kobj_attribute_write(register_async, register_bcache); kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); static bool bch_is_open_backing(struct block_device *bdev) @@ -2358,6 +2363,83 @@ static bool bch_is_open(struct block_device *bdev) return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); } +struct async_reg_args { + struct work_struct reg_work; + char *path; + struct cache_sb *sb; + struct cache_sb_disk *sb_disk; + struct block_device *bdev; +}; + +static void register_bdev_worker(struct work_struct *work) +{ + int fail = false; + struct async_reg_args *args = + container_of(work, struct async_reg_args, reg_work); + struct cached_dev *dc; + + dc = kzalloc(sizeof(*dc), GFP_KERNEL); + if (!dc) { + fail = true; + put_page(virt_to_page(args->sb_disk)); + blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + goto out; + } + + mutex_lock(&bch_register_lock); + if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) + fail = true; + mutex_unlock(&bch_register_lock); + +out: + if (fail) + pr_info("error %s: fail to register backing device\n", + args->path); + kfree(args->sb); + kfree(args->path); + kfree(args); + module_put(THIS_MODULE); +} + +static void register_cache_worker(struct work_struct *work) +{ + int fail = false; + struct async_reg_args *args = + container_of(work, struct async_reg_args, reg_work); + struct cache *ca; + + ca = kzalloc(sizeof(*ca), GFP_KERNEL); + if (!ca) { + fail = true; + put_page(virt_to_page(args->sb_disk)); + blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + goto out; + } + + /* blkdev_put() will be called in bch_cache_release() */ + if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) + fail = true; + +out: + if (fail) + pr_info("error %s: fail to register cache device\n", + args->path); + kfree(args->sb); + kfree(args->path); + kfree(args); + module_put(THIS_MODULE); +} + +static void register_device_aync(struct async_reg_args *args) +{ + if (SB_IS_BDEV(args->sb)) + INIT_WORK(&args->reg_work, register_bdev_worker); + else + INIT_WORK(&args->reg_work, register_cache_worker); + + queue_work(system_wq, &args->reg_work); +} + static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, const char *buffer, size_t size) { @@ -2420,6 +2502,26 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, goto out_blkdev_put; err = "failed to register device"; + if (attr == &ksysfs_register_async) { + /* register in asynchronous way */ + struct async_reg_args *args = + kzalloc(sizeof(struct async_reg_args), GFP_KERNEL); + + if (!args) { + ret = -ENOMEM; + err = "cannot allocate memory"; + goto out_put_sb_page; + } + + args->path = path; + args->sb = sb; + args->sb_disk = sb_disk; + args->bdev = bdev; + register_device_aync(args); + /* No wait and returns to user space */ + goto async_done; + } + if (SB_IS_BDEV(sb)) { struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); @@ -2447,6 +2549,7 @@ done: kfree(sb); kfree(path); module_put(THIS_MODULE); +async_done: return size; out_put_sb_page: @@ -2461,7 +2564,7 @@ out_free_path: out_module_put: module_put(THIS_MODULE); out: - pr_info("error %s: %s", path?path:"", err); + pr_info("error %s: %s\n", path?path:"", err); return ret; } @@ -2506,7 +2609,7 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k, mutex_unlock(&bch_register_lock); list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { - pr_info("delete pdev %p", pdev); + pr_info("delete pdev %p\n", pdev); list_del(&pdev->list); bcache_device_stop(&pdev->dc->disk); kfree(pdev); @@ -2549,7 +2652,7 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) mutex_unlock(&bch_register_lock); - pr_info("Stopping all devices:"); + pr_info("Stopping all devices:\n"); /* * The reason bch_register_lock is not held to call @@ -2599,9 +2702,9 @@ static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x) finish_wait(&unregister_wait, &wait); if (stopped) - pr_info("All devices stopped"); + pr_info("All devices stopped\n"); else - pr_notice("Timeout waiting for devices to be closed"); + pr_notice("Timeout waiting for devices to be closed\n"); out: mutex_unlock(&bch_register_lock); } @@ -2637,7 +2740,7 @@ static void check_module_parameters(void) if (bch_cutoff_writeback_sync == 0) bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC; else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) { - pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u", + pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n", bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX); bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX; } @@ -2645,13 +2748,13 @@ static void check_module_parameters(void) if (bch_cutoff_writeback == 0) bch_cutoff_writeback = CUTOFF_WRITEBACK; else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) { - pr_warn("set bch_cutoff_writeback (%u) to max value %u", + pr_warn("set bch_cutoff_writeback (%u) to max value %u\n", bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX); bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX; } if (bch_cutoff_writeback > bch_cutoff_writeback_sync) { - pr_warn("set bch_cutoff_writeback (%u) to %u", + pr_warn("set bch_cutoff_writeback (%u) to %u\n", bch_cutoff_writeback, bch_cutoff_writeback_sync); bch_cutoff_writeback = bch_cutoff_writeback_sync; } @@ -2662,6 +2765,9 @@ static int __init bcache_init(void) static const struct attribute *files[] = { &ksysfs_register.attr, &ksysfs_register_quiet.attr, +#ifdef CONFIG_BCACHE_ASYNC_REGISTRAION + &ksysfs_register_async.attr, +#endif &ksysfs_pendings_cleanup.attr, NULL }; diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index 323276994aab..0dadec5a78f6 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -421,7 +421,7 @@ STORE(__cached_dev) return size; } if (v == -ENOENT) - pr_err("Can't attach %s: cache set not found", buf); + pr_err("Can't attach %s: cache set not found\n", buf); return v; } @@ -455,7 +455,7 @@ STORE(bch_cached_dev) */ if (dc->writeback_running) { dc->writeback_running = false; - pr_err("%s: failed to run non-existent writeback thread", + pr_err("%s: failed to run non-existent writeback thread\n", dc->disk.disk->disk_name); } } else @@ -872,11 +872,11 @@ STORE(__bch_cache_set) if (v) { if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_warn("CACHE_SET_IO_DISABLE already set"); + pr_warn("CACHE_SET_IO_DISABLE already set\n"); } else { if (!test_and_clear_bit(CACHE_SET_IO_DISABLE, &c->flags)) - pr_warn("CACHE_SET_IO_DISABLE already cleared"); + pr_warn("CACHE_SET_IO_DISABLE already cleared\n"); } } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 3f7641fb28d5..1cf1e5016cb9 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -809,7 +809,7 @@ static int bch_root_node_dirty_init(struct cache_set *c, schedule_timeout_interruptible( msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); else if (ret < 0) { - pr_warn("sectors dirty init failed, ret=%d!", ret); + pr_warn("sectors dirty init failed, ret=%d!\n", ret); break; } } while (ret == -EAGAIN); @@ -917,7 +917,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL); if (!state) { - pr_warn("sectors dirty init failed: cannot allocate memory"); + pr_warn("sectors dirty init failed: cannot allocate memory\n"); return; } @@ -945,7 +945,7 @@ void bch_sectors_dirty_init(struct bcache_device *d) &state->infos[i], name); if (IS_ERR(state->infos[i].thread)) { - pr_err("fails to run thread bch_dirty_init[%d]", i); + pr_err("fails to run thread bch_dirty_init[%d]\n", i); for (--i; i >= 0; i--) kthread_stop(state->infos[i].thread); goto out; diff --git a/drivers/md/md-linear.h b/drivers/md/md-linear.h index 8381d651d4ed..24e97db50ebb 100644 --- a/drivers/md/md-linear.h +++ b/drivers/md/md-linear.h @@ -12,6 +12,6 @@ struct linear_conf struct rcu_head rcu; sector_t array_sectors; int raid_disks; /* a copy of mddev->raid_disks */ - struct dev_info disks[0]; + struct dev_info disks[]; }; #endif diff --git a/drivers/md/md.c b/drivers/md/md.c index 271e8a587354..f567f536b529 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -89,6 +89,7 @@ static struct module *md_cluster_mod; static DECLARE_WAIT_QUEUE_HEAD(resync_wait); static struct workqueue_struct *md_wq; static struct workqueue_struct *md_misc_wq; +static struct workqueue_struct *md_rdev_misc_wq; static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); @@ -227,13 +228,13 @@ void mddev_create_serial_pool(struct mddev *mddev, struct md_rdev *rdev, goto abort; if (mddev->serial_info_pool == NULL) { - unsigned int noio_flag; - - noio_flag = memalloc_noio_save(); + /* + * already in memalloc noio context by + * mddev_suspend() + */ mddev->serial_info_pool = mempool_create_kmalloc_pool(NR_SERIAL_INFOS, sizeof(struct serial_info)); - memalloc_noio_restore(noio_flag); if (!mddev->serial_info_pool) { rdevs_uninit_serial(mddev); pr_err("can't alloc memory pool for serialization\n"); @@ -466,7 +467,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) { const int rw = bio_data_dir(bio); const int sgrp = op_stat_group(bio_op(bio)); - struct mddev *mddev = q->queuedata; + struct mddev *mddev = bio->bi_disk->private_data; unsigned int sectors; if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { @@ -527,11 +528,15 @@ void mddev_suspend(struct mddev *mddev) wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); del_timer_sync(&mddev->safemode_timer); + /* restrict memory reclaim I/O during raid array is suspend */ + mddev->noio_flag = memalloc_noio_save(); } EXPORT_SYMBOL_GPL(mddev_suspend); void mddev_resume(struct mddev *mddev) { + /* entred the memalloc scope from mddev_suspend() */ + memalloc_noio_restore(mddev->noio_flag); lockdep_assert_held(&mddev->reconfig_mutex); if (--mddev->suspended) return; @@ -2454,7 +2459,7 @@ static int bind_rdev_to_array(struct md_rdev *rdev, struct mddev *mddev) return err; } -static void md_delayed_delete(struct work_struct *ws) +static void rdev_delayed_delete(struct work_struct *ws) { struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work); kobject_del(&rdev->kobj); @@ -2479,9 +2484,9 @@ static void unbind_rdev_from_array(struct md_rdev *rdev) * to delay it due to rcu usage. */ synchronize_rcu(); - INIT_WORK(&rdev->del_work, md_delayed_delete); + INIT_WORK(&rdev->del_work, rdev_delayed_delete); kobject_get(&rdev->kobj); - queue_work(md_misc_wq, &rdev->del_work); + queue_work(md_rdev_misc_wq, &rdev->del_work); } /* @@ -3191,8 +3196,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) rdev->saved_raid_disk = -1; clear_bit(In_sync, &rdev->flags); clear_bit(Bitmap_sync, &rdev->flags); - err = rdev->mddev->pers-> - hot_add_disk(rdev->mddev, rdev); + err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev); if (err) { rdev->raid_disk = -1; return err; @@ -4514,6 +4518,20 @@ null_show(struct mddev *mddev, char *page) return -EINVAL; } +/* need to ensure rdev_delayed_delete() has completed */ +static void flush_rdev_wq(struct mddev *mddev) +{ + struct md_rdev *rdev; + + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) + if (work_pending(&rdev->del_work)) { + flush_workqueue(md_rdev_misc_wq); + break; + } + rcu_read_unlock(); +} + static ssize_t new_dev_store(struct mddev *mddev, const char *buf, size_t len) { @@ -4541,8 +4559,7 @@ new_dev_store(struct mddev *mddev, const char *buf, size_t len) minor != MINOR(dev)) return -EOVERFLOW; - flush_workqueue(md_misc_wq); - + flush_rdev_wq(mddev); err = mddev_lock(mddev); if (err) return err; @@ -4780,7 +4797,8 @@ action_store(struct mddev *mddev, const char *page, size_t len) clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && mddev_lock(mddev) == 0) { - flush_workqueue(md_misc_wq); + if (work_pending(&mddev->del_work)) + flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); @@ -5626,7 +5644,6 @@ static int md_alloc(dev_t dev, char *name) mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE); if (!mddev->queue) goto abort; - mddev->queue->queuedata = mddev; blk_set_stacking_limits(&mddev->queue->limits); @@ -6147,7 +6164,8 @@ static void md_clean(struct mddev *mddev) static void __md_stop_writes(struct mddev *mddev) { set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - flush_workqueue(md_misc_wq); + if (work_pending(&mddev->del_work)) + flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); @@ -6200,7 +6218,8 @@ static void __md_stop(struct mddev *mddev) md_bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ - flush_workqueue(md_misc_wq); + if (mddev->event_work.func) + flush_workqueue(md_misc_wq); spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); @@ -7495,9 +7514,8 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, } - if (cmd == ADD_NEW_DISK) - /* need to ensure md_delayed_delete() has completed */ - flush_workqueue(md_misc_wq); + if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK) + flush_rdev_wq(mddev); if (cmd == HOT_REMOVE_DISK) /* need to ensure recovery thread has run */ @@ -7752,7 +7770,8 @@ static int md_open(struct block_device *bdev, fmode_t mode) */ mddev_put(mddev); /* Wait until bdev->bd_disk is definitely gone */ - flush_workqueue(md_misc_wq); + if (work_pending(&mddev->del_work)) + flush_workqueue(md_misc_wq); /* Then retry the open from the top */ return -ERESTARTSYS; } @@ -9040,8 +9059,7 @@ static int remove_and_add_spares(struct mddev *mddev, rdev->recovery_offset = 0; } - if (mddev->pers-> - hot_add_disk(mddev, rdev) == 0) { + if (mddev->pers->hot_add_disk(mddev, rdev) == 0) { if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; if (!test_bit(Journal, &rdev->flags)) @@ -9469,6 +9487,10 @@ static int __init md_init(void) if (!md_misc_wq) goto err_misc_wq; + md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0); + if (!md_misc_wq) + goto err_rdev_misc_wq; + if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) goto err_md; @@ -9490,6 +9512,8 @@ static int __init md_init(void) err_mdp: unregister_blkdev(MD_MAJOR, "md"); err_md: + destroy_workqueue(md_rdev_misc_wq); +err_rdev_misc_wq: destroy_workqueue(md_misc_wq); err_misc_wq: destroy_workqueue(md_wq); @@ -9776,6 +9800,7 @@ static __exit void md_exit(void) * destroy_workqueue() below will wait for that to complete. */ } + destroy_workqueue(md_rdev_misc_wq); destroy_workqueue(md_misc_wq); destroy_workqueue(md_wq); } @@ -9785,7 +9810,7 @@ module_exit(md_exit) static int get_ro(char *buffer, const struct kernel_param *kp) { - return sprintf(buffer, "%d", start_readonly); + return sprintf(buffer, "%d\n", start_readonly); } static int set_ro(const char *val, const struct kernel_param *kp) { diff --git a/drivers/md/md.h b/drivers/md/md.h index acd681939112..612814d07d35 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -497,6 +497,7 @@ struct mddev { void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); struct md_cluster_info *cluster_info; unsigned int good_device_nr; /* good device num within cluster raid */ + unsigned int noio_flag; /* for memalloc scope API */ bool has_superblocks:1; bool fail_last_dev:1; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index cd810e195086..dcd27f3da84e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -296,22 +296,17 @@ static void reschedule_retry(struct r1bio *r1_bio) static void call_bio_endio(struct r1bio *r1_bio) { struct bio *bio = r1_bio->master_bio; - struct r1conf *conf = r1_bio->mddev->private; if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) bio->bi_status = BLK_STS_IOERR; bio_endio(bio); - /* - * Wake up any possible resync thread that waits for the device - * to go idle. - */ - allow_barrier(conf, r1_bio->sector); } static void raid_end_bio_io(struct r1bio *r1_bio) { struct bio *bio = r1_bio->master_bio; + struct r1conf *conf = r1_bio->mddev->private; /* if nobody has done the final endio yet, do it now */ if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { @@ -322,6 +317,12 @@ static void raid_end_bio_io(struct r1bio *r1_bio) call_bio_endio(r1_bio); } + /* + * Wake up any possible resync thread that waits for the device + * to go idle. All I/Os, even write-behind writes, are done. + */ + allow_barrier(conf, r1_bio->sector); + free_r1bio(r1_bio); } diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index e7ccad898736..b7eb09e8c025 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -180,7 +180,7 @@ struct r1bio { * if the IO is in WRITE direction, then multiple bios are used. * We choose the number when they are allocated. */ - struct bio *bios[0]; + struct bio *bios[]; /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ }; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index d3eaaf3eb1bc..79cd2b7d3128 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -153,7 +153,7 @@ struct r10bio { }; sector_t addr; int devnum; - } devs[0]; + } devs[]; }; /* bits for r10bio.state */ diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index ba00e9877f02..ab8067f9ce8c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2215,10 +2215,13 @@ static int grow_stripes(struct r5conf *conf, int num) } /** - * scribble_len - return the required size of the scribble region + * scribble_alloc - allocate percpu scribble buffer for required size + * of the scribble region + * @percpu - from for_each_present_cpu() of the caller * @num - total number of disks in the array + * @cnt - scribble objs count for required size of the scribble region * - * The size must be enough to contain: + * The scribble buffer size must be enough to contain: * 1/ a struct page pointer for each device in the array +2 * 2/ room to convert each entry in (1) to its corresponding dma * (dma_map_page()) or page (page_address()) address. @@ -2228,14 +2231,19 @@ static int grow_stripes(struct r5conf *conf, int num) * of the P and Q blocks. */ static int scribble_alloc(struct raid5_percpu *percpu, - int num, int cnt, gfp_t flags) + int num, int cnt) { size_t obj_size = sizeof(struct page *) * (num+2) + sizeof(addr_conv_t) * (num+2); void *scribble; - scribble = kvmalloc_array(cnt, obj_size, flags); + /* + * If here is in raid array suspend context, it is in memalloc noio + * context as well, there is no potential recursive memory reclaim + * I/Os with the GFP_KERNEL flag. + */ + scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL); if (!scribble) return -ENOMEM; @@ -2267,8 +2275,7 @@ static int resize_chunks(struct r5conf *conf, int new_disks, int new_sectors) percpu = per_cpu_ptr(conf->percpu, cpu); err = scribble_alloc(percpu, new_disks, - new_sectors / STRIPE_SECTORS, - GFP_NOIO); + new_sectors / STRIPE_SECTORS); if (err) break; } @@ -6759,8 +6766,7 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu conf->previous_raid_disks), max(conf->chunk_sectors, conf->prev_chunk_sectors) - / STRIPE_SECTORS, - GFP_KERNEL)) { + / STRIPE_SECTORS)) { free_scratch_buffer(conf, percpu); return -ENOMEM; } diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cbf171636766..0585efa47d8f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -19,7 +19,6 @@ #include <linux/pr.h> #include <linux/ptrace.h> #include <linux/nvme_ioctl.h> -#include <linux/t10-pi.h> #include <linux/pm_qos.h> #include <asm/unaligned.h> @@ -204,11 +203,6 @@ static void nvme_delete_ctrl_sync(struct nvme_ctrl *ctrl) nvme_put_ctrl(ctrl); } -static inline bool nvme_ns_has_pi(struct nvme_ns *ns) -{ - return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); -} - static blk_status_t nvme_error_status(u16 status) { switch (status & 0x7ff) { @@ -433,7 +427,6 @@ static void nvme_free_ns_head(struct kref *ref) nvme_mpath_remove_disk(head); ida_simple_remove(&head->subsys->ns_ida, head->instance); - list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); nvme_put_subsystem(head->subsys); kfree(head); @@ -530,7 +523,7 @@ static int nvme_get_stream_params(struct nvme_ctrl *ctrl, c.directive.opcode = nvme_admin_directive_recv; c.directive.nsid = cpu_to_le32(nsid); - c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); + c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s))); c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; c.directive.dtype = NVME_DIR_STREAMS; @@ -553,19 +546,22 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); if (ret) - return ret; + goto out_disable_stream; ctrl->nssa = le16_to_cpu(s.nssa); if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { dev_info(ctrl->device, "too few streams (%u) available\n", ctrl->nssa); - nvme_disable_streams(ctrl); - return 0; + goto out_disable_stream; } ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); return 0; + +out_disable_stream: + nvme_disable_streams(ctrl); + return ret; } /* @@ -1027,6 +1023,19 @@ void nvme_stop_keep_alive(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); +/* + * In NVMe 1.0 the CNS field was just a binary controller or namespace + * flag, thus sending any new CNS opcodes has a big chance of not working. + * Qemu unfortunately had that bug after reporting a 1.1 version compliance + * (but not for any later version). + */ +static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) +{ + if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) + return ctrl->vs < NVME_VS(1, 2, 0); + return ctrl->vs < NVME_VS(1, 1, 0); +} + static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) { struct nvme_command c = { }; @@ -1290,7 +1299,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) meta_len = (io.nblocks + 1) * ns->ms; metadata = nvme_to_user_ptr(io.metadata); - if (ns->ext) { + if (ns->features & NVME_NS_EXT_LBAS) { length += meta_len; meta_len = 0; } else if (meta_len) { @@ -1392,8 +1401,10 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) } if (effects & NVME_CMD_EFFECTS_CCC) nvme_init_identify(ctrl); - if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) + if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { nvme_queue_scan(ctrl); + flush_work(&ctrl->scan_work); + } } static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, @@ -1682,7 +1693,8 @@ static int nvme_getgeo(struct block_device *bdev, struct hd_geometry *geo) } #ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { struct blk_integrity integrity; @@ -1705,20 +1717,15 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) } integrity.tuple_size = ms; blk_integrity_register(disk, &integrity); - blk_queue_max_integrity_segments(disk->queue, 1); + blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); } #else -static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) +static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, + u32 max_integrity_segments) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_set_chunk_size(struct nvme_ns *ns) -{ - u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); -} - static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) { struct nvme_ctrl *ctrl = ns->ctrl; @@ -1804,12 +1811,37 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; } +static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u32 *phys_bs, u32 *io_opt) +{ + struct streams_directive_params s; + int ret; + + if (!ctrl->nr_streams) + return 0; + + ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); + if (ret) + return ret; + + ns->sws = le32_to_cpu(s.sws); + ns->sgs = le16_to_cpu(s.sgs); + + if (ns->sws) { + *phys_bs = ns->sws * (1 << ns->lba_shift); + if (ns->sgs) + *io_opt = *phys_bs * ns->sgs; + } + + return 0; +} + static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; - u32 atomic_bs, phys_bs, io_opt; + u32 atomic_bs, phys_bs, io_opt = 0; if (ns->lba_shift > PAGE_SHIFT) { /* unsupported block size, set capacity to 0 later */ @@ -1818,26 +1850,25 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); + atomic_bs = phys_bs = bs; + nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt); if (id->nabo == 0) { /* * Bit 1 indicates whether NAWUPF is defined for this namespace * and whether it should be used instead of AWUPF. If NAWUPF == * 0 then AWUPF must be used instead. */ - if (id->nsfeat & (1 << 1) && id->nawupf) + if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; else atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; - } else { - atomic_bs = bs; } - phys_bs = bs; - io_opt = bs; - if (id->nsfeat & (1 << 4)) { + + if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { /* NPWG = Namespace Preferred Write Granularity */ - phys_bs *= 1 + le16_to_cpu(id->npwg); + phys_bs = bs * (1 + le16_to_cpu(id->npwg)); /* NOWS = Namespace Optimal Write Size */ - io_opt *= 1 + le16_to_cpu(id->nows); + io_opt = bs * (1 + le16_to_cpu(id->nows)); } blk_queue_logical_block_size(disk->queue, bs); @@ -1850,19 +1881,34 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_queue_io_min(disk->queue, phys_bs); blk_queue_io_opt(disk->queue, io_opt); - if (ns->ms && !ns->ext && - (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) - nvme_init_integrity(disk, ns->ms, ns->pi_type); - if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || - ns->lba_shift > PAGE_SHIFT) + /* + * The block layer can't support LBA sizes larger than the page size + * yet, so catch this early and don't allow block I/O. + */ + if (ns->lba_shift > PAGE_SHIFT) capacity = 0; + /* + * Register a metadata profile for PI, or the plain non-integrity NVMe + * metadata masquerading as Type 0 if supported, otherwise reject block + * I/O to namespaces with metadata except when the namespace supports + * PI, as it can strip/insert in that case. + */ + if (ns->ms) { + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + (ns->features & NVME_NS_METADATA_SUPPORTED)) + nvme_init_integrity(disk, ns->ms, ns->pi_type, + ns->ctrl->max_integrity_segments); + else if (!nvme_ns_has_pi(ns)) + capacity = 0; + } + set_capacity_revalidate_and_notify(disk, capacity, false); nvme_config_discard(disk, ns); nvme_config_write_zeroes(disk, ns); - if (id->nsattr & (1 << 0)) + if (id->nsattr & NVME_NS_ATTR_RO) set_disk_ro(disk, true); else set_disk_ro(disk, false); @@ -1870,9 +1916,11 @@ static void nvme_update_disk_info(struct gendisk *disk, blk_mq_unfreeze_queue(disk->queue); } -static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) +static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) { struct nvme_ns *ns = disk->private_data; + struct nvme_ctrl *ctrl = ns->ctrl; + u32 iob; /* * If identify namespace failed, use default 512 byte block size so @@ -1881,32 +1929,55 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; if (ns->lba_shift == 0) ns->lba_shift = 9; - ns->noiob = le16_to_cpu(id->noiob); + + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && + is_power_of_2(ctrl->max_hw_sectors)) + iob = ctrl->max_hw_sectors; + else + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); + + ns->features = 0; ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); /* the PI implementation requires metadata equal t10 pi tuple size */ if (ns->ms == sizeof(struct t10_pi_tuple)) ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; else ns->pi_type = 0; - if (ns->noiob) - nvme_set_chunk_size(ns); + if (ns->ms) { + /* + * For PCIe only the separate metadata pointer is supported, + * as the block layer supplies metadata in a separate bio_vec + * chain. For Fabrics, only metadata as part of extended data + * LBA is supported on the wire per the Fabrics specification, + * but the HBA/HCA will do the remapping from the separate + * metadata buffers for us. + */ + if (id->flbas & NVME_NS_FLBAS_META_EXT) { + ns->features |= NVME_NS_EXT_LBAS; + if ((ctrl->ops->flags & NVME_F_FABRICS) && + (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) && + ctrl->max_integrity_segments) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } else { + if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS)) + return -EINVAL; + if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) + ns->features |= NVME_NS_METADATA_SUPPORTED; + } + } + + if (iob) + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); nvme_update_disk_info(disk, ns, id); #ifdef CONFIG_NVME_MULTIPATH if (ns->head->disk) { nvme_update_disk_info(ns->head->disk, ns, id); blk_queue_stack_limits(ns->head->disk->queue, ns->queue); - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { - struct backing_dev_info *info = - ns->head->disk->queue->backing_dev_info; - - info->capabilities |= BDI_CAP_STABLE_WRITES; - } - revalidate_disk(ns->head->disk); } #endif + return 0; } static int nvme_revalidate_disk(struct gendisk *disk) @@ -1931,7 +2002,6 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto free_id; } - __nvme_revalidate_disk(disk, id); ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); if (ret) goto free_id; @@ -1940,8 +2010,10 @@ static int nvme_revalidate_disk(struct gendisk *disk) dev_err(ctrl->device, "identifiers changed for nsid %d\n", ns->head->ns_id); ret = -ENODEV; + goto free_id; } + ret = __nvme_revalidate_disk(disk, id); free_id: kfree(id); out: @@ -2249,10 +2321,8 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && - is_power_of_2(ctrl->max_hw_sectors)) - blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); blk_queue_virt_boundary(q, ctrl->page_size - 1); + blk_queue_dma_alignment(q, 7); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; blk_queue_write_cache(q, vwc, vwc); @@ -2655,7 +2725,7 @@ static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, return false; } - if ((id->cmic & (1 << 1)) || + if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || (ctrl->opts && ctrl->opts->discovery_nqn)) continue; @@ -2746,7 +2816,7 @@ int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, void *log, size_t size, u64 offset) { struct nvme_command c = { }; - unsigned long dwlen = size / 4 - 1; + u32 dwlen = nvme_bytes_to_numd(size); c.get_log_page.opcode = nvme_admin_get_log_page; c.get_log_page.nsid = cpu_to_le32(nsid); @@ -3401,7 +3471,6 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, list_for_each_entry(h, &subsys->nsheads, entry) { if (nvme_ns_ids_valid(&new->ids) && - !list_empty(&h->list) && nvme_ns_ids_equal(&new->ids, &h->ids)) return -EINVAL; } @@ -3410,8 +3479,7 @@ static int __nvme_check_ids(struct nvme_subsystem *subsys, } static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, - unsigned nsid, struct nvme_id_ns *id, - struct nvme_ns_ids *ids) + unsigned nsid, struct nvme_ns_ids *ids) { struct nvme_ns_head *head; size_t size = sizeof(*head); @@ -3469,42 +3537,51 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, struct nvme_id_ns *id) { struct nvme_ctrl *ctrl = ns->ctrl; - bool is_shared = id->nmic & (1 << 0); + bool is_shared = id->nmic & NVME_NS_NMIC_SHARED; struct nvme_ns_head *head = NULL; struct nvme_ns_ids ids; int ret = 0; ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); - if (ret) - goto out; + if (ret) { + if (ret < 0) + return ret; + return blk_status_to_errno(nvme_error_status(ret)); + } mutex_lock(&ctrl->subsys->lock); - if (is_shared) - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl->subsys, nsid); if (!head) { - head = nvme_alloc_ns_head(ctrl, nsid, id, &ids); + head = nvme_alloc_ns_head(ctrl, nsid, &ids); if (IS_ERR(head)) { ret = PTR_ERR(head); goto out_unlock; } + head->shared = is_shared; } else { + ret = -EINVAL; + if (!is_shared || !head->shared) { + dev_err(ctrl->device, + "Duplicate unshared namespace %d\n", nsid); + goto out_put_ns_head; + } if (!nvme_ns_ids_equal(&head->ids, &ids)) { dev_err(ctrl->device, "IDs don't match for shared namespace %d\n", nsid); - ret = -EINVAL; - goto out_unlock; + goto out_put_ns_head; } } list_add_tail(&ns->siblings, &head->list); ns->head = head; + mutex_unlock(&ctrl->subsys->lock); + return 0; +out_put_ns_head: + nvme_put_ns_head(head); out_unlock: mutex_unlock(&ctrl->subsys->lock); -out: - if (ret > 0) - ret = blk_status_to_errno(nvme_error_status(ret)); return ret; } @@ -3535,32 +3612,6 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) return ret; } -static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) -{ - struct streams_directive_params s; - int ret; - - if (!ctrl->nr_streams) - return 0; - - ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); - if (ret) - return ret; - - ns->sws = le32_to_cpu(s.sws); - ns->sgs = le16_to_cpu(s.sgs); - - if (ns->sws) { - unsigned int bs = 1 << ns->lba_shift; - - blk_queue_io_min(ns->queue, bs * ns->sws); - if (ns->sgs) - blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); - } - - return 0; -} - static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3604,7 +3655,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ret = nvme_init_ns_head(ns, nsid, id); if (ret) goto out_free_id; - nvme_setup_streams_ns(ctrl, ns); nvme_set_disk_name(disk_name, ns, ctrl, &flags); disk = alloc_disk_node(0, node); @@ -3618,7 +3668,8 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); ns->disk = disk; - __nvme_revalidate_disk(disk, id); + if (__nvme_revalidate_disk(disk, id)) + goto out_free_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3645,9 +3696,13 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) /* prevent double queue cleanup */ ns->disk->queue = NULL; put_disk(ns->disk); + out_free_disk: + del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ctrl->subsys->lock); nvme_put_ns_head(ns->head); out_free_id: @@ -3667,7 +3722,10 @@ static void nvme_ns_remove(struct nvme_ns *ns) mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); + if (list_empty(&ns->head->list)) + list_del_init(&ns->head->entry); mutex_unlock(&ns->ctrl->subsys->lock); + synchronize_rcu(); /* guarantee not available in head->list */ nvme_mpath_clear_current_path(ns); synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ @@ -3687,6 +3745,16 @@ static void nvme_ns_remove(struct nvme_ns *ns) nvme_put_ns(ns); } +static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) +{ + struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid); + + if (ns) { + nvme_ns_remove(ns); + nvme_put_ns(ns); + } +} + static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns; @@ -3718,39 +3786,34 @@ static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl, } -static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) +static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) { - struct nvme_ns *ns; + const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32); __le32 *ns_list; - unsigned i, j, nsid, prev = 0; - unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); - int ret = 0; + u32 prev = 0; + int ret = 0, i; + + if (nvme_ctrl_limited_cns(ctrl)) + return -EOPNOTSUPP; ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); if (!ns_list) return -ENOMEM; - for (i = 0; i < num_lists; i++) { + for (;;) { ret = nvme_identify_ns_list(ctrl, prev, ns_list); if (ret) goto free; - for (j = 0; j < min(nn, 1024U); j++) { - nsid = le32_to_cpu(ns_list[j]); - if (!nsid) - goto out; + for (i = 0; i < nr_entries; i++) { + u32 nsid = le32_to_cpu(ns_list[i]); + if (!nsid) /* end of the list? */ + goto out; nvme_validate_ns(ctrl, nsid); - - while (++prev < nsid) { - ns = nvme_find_get_ns(ctrl, prev); - if (ns) { - nvme_ns_remove(ns); - nvme_put_ns(ns); - } - } + while (++prev < nsid) + nvme_ns_remove_by_nsid(ctrl, prev); } - nn -= j; } out: nvme_remove_invalid_namespaces(ctrl, prev); @@ -3759,9 +3822,15 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) return ret; } -static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) +static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl) { - unsigned i; + struct nvme_id_ctrl *id; + u32 nn, i; + + if (nvme_identify_ctrl(ctrl, &id)) + return; + nn = le32_to_cpu(id->nn); + kfree(id); for (i = 1; i <= nn; i++) nvme_validate_ns(ctrl, i); @@ -3798,8 +3867,6 @@ static void nvme_scan_work(struct work_struct *work) { struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, scan_work); - struct nvme_id_ctrl *id; - unsigned nn; /* No tagset on a live ctrl means IO queues could not created */ if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) @@ -3810,20 +3877,11 @@ static void nvme_scan_work(struct work_struct *work) nvme_clear_changed_ns_log(ctrl); } - if (nvme_identify_ctrl(ctrl, &id)) - return; - mutex_lock(&ctrl->scan_lock); - nn = le32_to_cpu(id->nn); - if (ctrl->vs >= NVME_VS(1, 1, 0) && - !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { - if (!nvme_scan_ns_list(ctrl, nn)) - goto out_free_id; - } - nvme_scan_ns_sequential(ctrl, nn); -out_free_id: + if (nvme_scan_ns_list(ctrl) != 0) + nvme_scan_ns_sequential(ctrl); mutex_unlock(&ctrl->scan_lock); - kfree(id); + down_write(&ctrl->namespaces_rwsem); list_sort(NULL, &ctrl->namespaces, ns_cmp); up_write(&ctrl->namespaces_rwsem); diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7dfc4a2ecf1e..cb0007592c12 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -14,6 +14,7 @@ #include "fabrics.h" #include <linux/nvme-fc-driver.h> #include <linux/nvme-fc.h> +#include "fc.h" #include <scsi/scsi_transport_fc.h> /* *************************** Data Structures/Defines ****************** */ @@ -61,6 +62,17 @@ struct nvmefc_ls_req_op { bool req_queued; }; +struct nvmefc_ls_rcv_op { + struct nvme_fc_rport *rport; + struct nvmefc_ls_rsp *lsrsp; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; + u16 rqstdatalen; + bool handled; + dma_addr_t rspdma; + struct list_head lsrcv_list; /* rport->ls_rcv_list */ +} __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ + enum nvme_fcpop_state { FCPOP_STATE_UNINIT = 0, FCPOP_STATE_IDLE = 1, @@ -96,7 +108,7 @@ struct nvme_fc_fcp_op { struct nvme_fcp_op_w_sgl { struct nvme_fc_fcp_op op; struct scatterlist sgl[NVME_INLINE_SG_CNT]; - uint8_t priv[0]; + uint8_t priv[]; }; struct nvme_fc_lport { @@ -117,6 +129,7 @@ struct nvme_fc_rport { struct list_head endp_list; /* for lport->endp_list */ struct list_head ctrl_list; struct list_head ls_req_list; + struct list_head ls_rcv_list; struct list_head disc_list; struct device *dev; /* physical device for dma */ struct nvme_fc_lport *lport; @@ -124,11 +137,12 @@ struct nvme_fc_rport { struct kref ref; atomic_t act_ctrl_cnt; unsigned long dev_loss_end; + struct work_struct lsrcv_work; } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ -enum nvme_fcctrl_flags { - FCCTRL_TERMIO = (1 << 0), -}; +/* fc_ctrl flags values - specified as bit positions */ +#define ASSOC_ACTIVE 0 +#define FCCTRL_TERMIO 1 struct nvme_fc_ctrl { spinlock_t lock; @@ -139,9 +153,9 @@ struct nvme_fc_ctrl { u32 cnum; bool ioq_live; - bool assoc_active; atomic_t err_work_active; u64 association_id; + struct nvmefc_ls_rcv_op *rcv_disconn; struct list_head ctrl_list; /* rport->ctrl_list */ @@ -152,7 +166,7 @@ struct nvme_fc_ctrl { struct work_struct err_work; struct kref ref; - u32 flags; + unsigned long flags; u32 iocnt; wait_queue_head_t ioabort_wait; @@ -219,6 +233,9 @@ static struct device *fc_udev_device; static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void nvme_fc_handle_ls_rqst_work(struct work_struct *work); + + static void nvme_fc_free_lport(struct kref *ref) { @@ -394,7 +411,10 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, newrec->ops = template; newrec->dev = dev; ida_init(&newrec->endp_cnt); - newrec->localport.private = &newrec[1]; + if (template->local_priv_sz) + newrec->localport.private = &newrec[1]; + else + newrec->localport.private = NULL; newrec->localport.node_name = pinfo->node_name; newrec->localport.port_name = pinfo->port_name; newrec->localport.port_role = pinfo->port_role; @@ -701,9 +721,13 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, atomic_set(&newrec->act_ctrl_cnt, 0); spin_lock_init(&newrec->lock); newrec->remoteport.localport = &lport->localport; + INIT_LIST_HEAD(&newrec->ls_rcv_list); newrec->dev = lport->dev; newrec->lport = lport; - newrec->remoteport.private = &newrec[1]; + if (lport->ops->remote_priv_sz) + newrec->remoteport.private = &newrec[1]; + else + newrec->remoteport.private = NULL; newrec->remoteport.port_role = pinfo->port_role; newrec->remoteport.node_name = pinfo->node_name; newrec->remoteport.port_name = pinfo->port_name; @@ -711,6 +735,7 @@ nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; newrec->remoteport.port_num = idx; __nvme_fc_set_dev_loss_tmo(newrec, pinfo); + INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work); spin_lock_irqsave(&nvme_fc_lock, flags); list_add_tail(&newrec->endp_list, &lport->endp_list); @@ -1000,6 +1025,7 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); +static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); static void __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) @@ -1140,41 +1166,6 @@ nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, return __nvme_fc_send_ls_req(rport, lsop, done); } -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_LSACC = 1, - VERR_LSDESC_RQST = 2, - VERR_LSDESC_RQST_LEN = 3, - VERR_ASSOC_ID = 4, - VERR_ASSOC_ID_LEN = 5, - VERR_CONN_ID = 6, - VERR_CONN_ID_LEN = 7, - VERR_CR_ASSOC = 8, - VERR_CR_ASSOC_ACC_LEN = 9, - VERR_CR_CONN = 10, - VERR_CR_CONN_ACC_LEN = 11, - VERR_DISCONN = 12, - VERR_DISCONN_ACC_LEN = 13, -}; - -static char *validation_errors[] = { - "OK", - "Not LS_ACC", - "Not LSDESC_RQST", - "Bad LSDESC_RQST Length", - "Not Association ID", - "Bad Association ID Length", - "Not Connection ID", - "Bad Connection ID Length", - "Not CR_ASSOC Rqst", - "Bad CR_ASSOC ACC Length", - "Not CR_CONN Rqst", - "Bad CR_CONN ACC Length", - "Not Disconnect Rqst", - "Bad Disconnect ACC Length", -}; - static int nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) @@ -1183,21 +1174,27 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, struct nvmefc_ls_req *lsreq; struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; struct fcnvme_ls_cr_assoc_acc *assoc_acc; + unsigned long flags; int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); + sizeof(*assoc_rqst) + sizeof(*assoc_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Association failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1]; assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = &assoc_acc[1]; + else + lsreq->private = NULL; assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; assoc_rqst->desc_list_len = @@ -1267,11 +1264,13 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, "q %d Create Association LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = be64_to_cpu(assoc_acc->associd.association_id); queue->connection_id = be64_to_cpu(assoc_acc->connectid.connection_id); set_bit(NVME_FC_Q_CONNECTED, &queue->flags); + spin_unlock_irqrestore(&ctrl->lock, flags); } out_free_buffer: @@ -1295,18 +1294,23 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, int ret, fcret = 0; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); + sizeof(*conn_rqst) + sizeof(*conn_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Create Connection failed: ENOMEM\n", + ctrl->cnum); ret = -ENOMEM; goto out_no_memory; } - lsreq = &lsop->ls_req; - lsreq->private = (void *)&lsop[1]; - conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); + conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1]; conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; + lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&conn_acc[1]; + else + lsreq->private = NULL; conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; conn_rqst->desc_list_len = cpu_to_be32( @@ -1420,54 +1424,385 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) int ret; lsop = kzalloc((sizeof(*lsop) + - ctrl->lport->ops->lsrqst_priv_sz + - sizeof(*discon_rqst) + sizeof(*discon_acc)), - GFP_KERNEL); - if (!lsop) - /* couldn't sent it... too bad */ + sizeof(*discon_rqst) + sizeof(*discon_acc) + + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); + if (!lsop) { + dev_info(ctrl->ctrl.device, + "NVME-FC{%d}: send Disconnect Association " + "failed: ENOMEM\n", + ctrl->cnum); return; + } + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; lsreq = &lsop->ls_req; + if (ctrl->lport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&discon_acc[1]; + else + lsreq->private = NULL; - lsreq->private = (void *)&lsop[1]; - discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *) - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); - discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, + ctrl->association_id); - discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; - discon_rqst->desc_list_len = cpu_to_be32( - sizeof(struct fcnvme_lsdesc_assoc_id) + - sizeof(struct fcnvme_lsdesc_disconn_cmd)); + ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, + nvme_fc_disconnect_assoc_done); + if (ret) + kfree(lsop); +} - discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); - discon_rqst->associd.desc_len = - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_assoc_id)); +static void +nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) +{ + struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + list_del(&lsop->lsrcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + kfree(lsop); + + nvme_fc_rport_put(rport); +} + +static void +nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + int ret; + + fc_dma_sync_single_for_device(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); + + ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport, + lsop->lsrsp); + if (ret) { + dev_warn(lport->dev, + "LLDD rejected LS RSP xmt: LS %d status %d\n", + w0->ls_cmd, ret); + nvme_fc_xmt_ls_rsp_done(lsop->lsrsp); + return; + } +} + +static struct nvme_fc_ctrl * +nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, + struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct nvme_fc_ctrl *ctrl, *ret = NULL; + struct nvmefc_ls_rcv_op *oldls = NULL; + u64 association_id = be64_to_cpu(rqst->associd.association_id); + unsigned long flags; + + spin_lock_irqsave(&rport->lock, flags); + + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { + if (!nvme_fc_ctrl_get(ctrl)) + continue; + spin_lock(&ctrl->lock); + if (association_id == ctrl->association_id) { + oldls = ctrl->rcv_disconn; + ctrl->rcv_disconn = lsop; + ret = ctrl; + } + spin_unlock(&ctrl->lock); + if (ret) + /* leave the ctrl get reference */ + break; + nvme_fc_ctrl_put(ctrl); + } + + spin_unlock_irqrestore(&rport->lock, flags); + + /* transmit a response for anything that was pending */ + if (oldls) { + dev_info(rport->lport->dev, + "NVME-FC{%d}: Multiple Disconnect Association " + "LS's received\n", ctrl->cnum); + /* overwrite good response with bogus failure */ + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, + sizeof(*oldls->rspbuf), + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + nvme_fc_xmt_ls_rsp(oldls); + } + + return ret; +} + +/* + * returns true to mean LS handled and ls_rsp can be sent + * returns false to defer ls_rsp xmt (will be done as part of + * association termination) + */ +static bool +nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop) +{ + struct nvme_fc_rport *rport = lsop->rport; + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + &lsop->rqstbuf->rq_dis_assoc; + struct fcnvme_ls_disconnect_assoc_acc *acc = + &lsop->rspbuf->rsp_dis_assoc; + struct nvme_fc_ctrl *ctrl = NULL; + int ret = 0; + + memset(acc, 0, sizeof(*acc)); + + ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst); + if (!ret) { + /* match an active association */ + ctrl = nvme_fc_match_disconn_ls(rport, lsop); + if (!ctrl) + ret = VERR_NO_ASSOC; + } + + if (ret) { + dev_info(rport->lport->dev, + "Disconnect LS failed: %s\n", + validation_errors[ret]); + lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc, + sizeof(*acc), rqst->w0.ls_cmd, + (ret == VERR_NO_ASSOC) ? + FCNVME_RJT_RC_INV_ASSOC : + FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_EXP_NONE, 0); + return true; + } - discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); + /* format an ACCept response */ - discon_rqst->discon_cmd.desc_tag = cpu_to_be32( - FCNVME_LSDESC_DISCONN_CMD); - discon_rqst->discon_cmd.desc_len = + lsop->lsrsp->rsplen = sizeof(*acc); + + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_disconn_cmd)); + sizeof(struct fcnvme_ls_disconnect_assoc_acc)), + FCNVME_LS_DISCONNECT_ASSOC); - lsreq->rqstaddr = discon_rqst; - lsreq->rqstlen = sizeof(*discon_rqst); - lsreq->rspaddr = discon_acc; - lsreq->rsplen = sizeof(*discon_acc); - lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; + /* + * the transmit of the response will occur after the exchanges + * for the association have been ABTS'd by + * nvme_fc_delete_association(). + */ - ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, - nvme_fc_disconnect_assoc_done); - if (ret) - kfree(lsop); + /* fail the association */ + nvme_fc_error_recovery(ctrl, "Disconnect Association LS received"); + + /* release the reference taken by nvme_fc_match_disconn_ls() */ + nvme_fc_ctrl_put(ctrl); + + return false; } +/* + * Actual Processing routine for received FC-NVME LS Requests from the LLD + * returns true if a response should be sent afterward, false if rsp will + * be sent asynchronously. + */ +static bool +nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop) +{ + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; + bool ret = true; + + lsop->lsrsp->nvme_fc_private = lsop; + lsop->lsrsp->rspbuf = lsop->rspbuf; + lsop->lsrsp->rspdma = lsop->rspdma; + lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done; + /* Be preventative. handlers will later set to valid length */ + lsop->lsrsp->rsplen = 0; -/* *********************** NVME Ctrl Routines **************************** */ + /* + * handlers: + * parse request input, execute the request, and format the + * LS response + */ + switch (w0->ls_cmd) { + case FCNVME_LS_DISCONNECT_ASSOC: + ret = nvme_fc_ls_disconnect_assoc(lsop); + break; + case FCNVME_LS_DISCONNECT_CONN: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0); + break; + case FCNVME_LS_CREATE_ASSOCIATION: + case FCNVME_LS_CREATE_CONNECTION: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); + break; + default: + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, + sizeof(*lsop->rspbuf), w0->ls_cmd, + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); + break; + } -static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); + return(ret); +} + +static void +nvme_fc_handle_ls_rqst_work(struct work_struct *work) +{ + struct nvme_fc_rport *rport = + container_of(work, struct nvme_fc_rport, lsrcv_work); + struct fcnvme_ls_rqst_w0 *w0; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + bool sendrsp; + +restart: + sendrsp = true; + spin_lock_irqsave(&rport->lock, flags); + list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) { + if (lsop->handled) + continue; + + lsop->handled = true; + if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + sendrsp = nvme_fc_handle_ls_rqst(lsop); + } else { + spin_unlock_irqrestore(&rport->lock, flags); + w0 = &lsop->rqstbuf->w0; + lsop->lsrsp->rsplen = nvme_fc_format_rjt( + lsop->rspbuf, + sizeof(*lsop->rspbuf), + w0->ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + } + if (sendrsp) + nvme_fc_xmt_ls_rsp(lsop); + goto restart; + } + spin_unlock_irqrestore(&rport->lock, flags); +} + +/** + * nvme_fc_rcv_ls_req - transport entry point called by an LLDD + * upon the reception of a NVME LS request. + * + * The nvme-fc layer will copy payload to an internal structure for + * processing. As such, upon completion of the routine, the LLDD may + * immediately free/reuse the LS request buffer passed in the call. + * + * If this routine returns error, the LLDD should abort the exchange. + * + * @remoteport: pointer to the (registered) remote port that the LS + * was received from. The remoteport is associated with + * a specific localport. + * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be + * used to reference the exchange corresponding to the LS + * when issuing an ls response. + * @lsreqbuf: pointer to the buffer containing the LS Request + * @lsreqbuf_len: length, in bytes, of the received LS request + */ +int +nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, + struct nvmefc_ls_rsp *lsrsp, + void *lsreqbuf, u32 lsreqbuf_len) +{ + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); + struct nvme_fc_lport *lport = rport->lport; + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; + struct nvmefc_ls_rcv_op *lsop; + unsigned long flags; + int ret; + + nvme_fc_rport_get(rport); + + /* validate there's a routine to transmit a response */ + if (!lport->ops->xmt_ls_rsp) { + dev_info(lport->dev, + "RCV %s LS failed: no LLDD xmt_ls_rsp\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EINVAL; + goto out_put; + } + + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { + dev_info(lport->dev, + "RCV %s LS failed: payload too large\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -E2BIG; + goto out_put; + } + + lsop = kzalloc(sizeof(*lsop) + + sizeof(union nvmefc_ls_requests) + + sizeof(union nvmefc_ls_responses), + GFP_KERNEL); + if (!lsop) { + dev_info(lport->dev, + "RCV %s LS failed: No memory\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -ENOMEM; + goto out_put; + } + lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1]; + lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1]; + + lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf, + sizeof(*lsop->rspbuf), + DMA_TO_DEVICE); + if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) { + dev_info(lport->dev, + "RCV %s LS failed: DMA mapping failure\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); + ret = -EFAULT; + goto out_free; + } + + lsop->rport = rport; + lsop->lsrsp = lsrsp; + + memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len); + lsop->rqstdatalen = lsreqbuf_len; + + spin_lock_irqsave(&rport->lock, flags); + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) { + spin_unlock_irqrestore(&rport->lock, flags); + ret = -ENOTCONN; + goto out_unmap; + } + list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list); + spin_unlock_irqrestore(&rport->lock, flags); + + schedule_work(&rport->lsrcv_work); + + return 0; + +out_unmap: + fc_dma_unmap_single(lport->dev, lsop->rspdma, + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); +out_free: + kfree(lsop); +out_put: + nvme_fc_rport_put(rport); + return ret; +} +EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); + + +/* *********************** NVME Ctrl Routines **************************** */ static void __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, @@ -1500,7 +1835,7 @@ __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); if (opstate != FCPOP_STATE_ACTIVE) atomic_set(&op->state, opstate); - else if (ctrl->flags & FCCTRL_TERMIO) + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) ctrl->iocnt++; spin_unlock_irqrestore(&ctrl->lock, flags); @@ -1537,7 +1872,7 @@ __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, if (opstate == FCPOP_STATE_ABORTED) { spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) { + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { if (!--ctrl->iocnt) wake_up(&ctrl->ioabort_wait); } @@ -1771,7 +2106,7 @@ nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); if (res) return res; - op->op.fcp_req.first_sgl = &op->sgl[0]; + op->op.fcp_req.first_sgl = op->sgl; op->op.fcp_req.private = &op->priv[0]; nvme_req(rq)->ctrl = &ctrl->ctrl; return res; @@ -1783,15 +2118,17 @@ nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) struct nvme_fc_fcp_op *aen_op; struct nvme_fc_cmd_iu *cmdiu; struct nvme_command *sqe; - void *private; + void *private = NULL; int i, ret; aen_op = ctrl->aen_ops; for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { - private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, + if (ctrl->lport->ops->fcprqst_priv_sz) { + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, GFP_KERNEL); - if (!private) - return -ENOMEM; + if (!private) + return -ENOMEM; + } cmdiu = &aen_op->cmd_iu; sqe = &cmdiu->sqe; @@ -1822,9 +2159,6 @@ nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) aen_op = ctrl->aen_ops; for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { - if (!aen_op->fcp_req.private) - continue; - __nvme_fc_exit_request(ctrl, aen_op); kfree(aen_op->fcp_req.private); @@ -2366,16 +2700,9 @@ nvme_fc_submit_async_event(struct nvme_ctrl *arg) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); struct nvme_fc_fcp_op *aen_op; - unsigned long flags; - bool terminating = false; blk_status_t ret; - spin_lock_irqsave(&ctrl->lock, flags); - if (ctrl->flags & FCCTRL_TERMIO) - terminating = true; - spin_unlock_irqrestore(&ctrl->lock, flags); - - if (terminating) + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) return; aen_op = &ctrl->aen_ops[0]; @@ -2584,10 +2911,9 @@ nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) struct nvme_fc_rport *rport = ctrl->rport; u32 cnt; - if (ctrl->assoc_active) + if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags)) return 1; - ctrl->assoc_active = true; cnt = atomic_inc_return(&rport->act_ctrl_cnt); if (cnt == 1) nvme_fc_rport_active_on_lport(rport); @@ -2602,7 +2928,7 @@ nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) struct nvme_fc_lport *lport = rport->lport; u32 cnt; - /* ctrl->assoc_active=false will be set independently */ + /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */ cnt = atomic_dec_return(&rport->act_ctrl_cnt); if (cnt == 0) { @@ -2622,6 +2948,8 @@ static int nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; + struct nvmefc_ls_rcv_op *disls = NULL; + unsigned long flags; int ret; bool changed; @@ -2739,12 +3067,18 @@ out_term_aen_ops: out_disconnect_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + nvme_fc_xmt_ls_rsp(disls); out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); - ctrl->assoc_active = false; + clear_bit(ASSOC_ACTIVE, &ctrl->flags); nvme_fc_ctlr_inactive_on_rport(ctrl); return ret; @@ -2759,14 +3093,14 @@ out_free_queue: static void nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) { + struct nvmefc_ls_rcv_op *disls = NULL; unsigned long flags; - if (!ctrl->assoc_active) + if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) return; - ctrl->assoc_active = false; spin_lock_irqsave(&ctrl->lock, flags); - ctrl->flags |= FCCTRL_TERMIO; + set_bit(FCCTRL_TERMIO, &ctrl->flags); ctrl->iocnt = 0; spin_unlock_irqrestore(&ctrl->lock, flags); @@ -2817,7 +3151,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) /* wait for all io that had to be aborted */ spin_lock_irq(&ctrl->lock); wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); - ctrl->flags &= ~FCCTRL_TERMIO; + clear_bit(FCCTRL_TERMIO, &ctrl->flags); spin_unlock_irq(&ctrl->lock); nvme_fc_term_aen_ops(ctrl); @@ -2831,7 +3165,17 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) if (ctrl->association_id) nvme_fc_xmt_disconnect_assoc(ctrl); + spin_lock_irqsave(&ctrl->lock, flags); ctrl->association_id = 0; + disls = ctrl->rcv_disconn; + ctrl->rcv_disconn = NULL; + spin_unlock_irqrestore(&ctrl->lock, flags); + if (disls) + /* + * if a Disconnect Request was waiting for a response, send + * now that all ABTS's have been issued (and are complete). + */ + nvme_fc_xmt_ls_rsp(disls); if (ctrl->ctrl.tagset) { nvme_fc_delete_hw_io_queues(ctrl); @@ -2902,7 +3246,9 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: dev_loss_tmo (%d) expired " "while waiting for remoteport connectivity.\n", - ctrl->cnum, portptr->dev_loss_tmo); + ctrl->cnum, min_t(int, portptr->dev_loss_tmo, + (ctrl->ctrl.opts->max_reconnects * + ctrl->ctrl.opts->reconnect_delay))); WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); } } @@ -3089,7 +3435,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->dev = lport->dev; ctrl->cnum = idx; ctrl->ioq_live = false; - ctrl->assoc_active = false; atomic_set(&ctrl->err_work_active, 0); init_waitqueue_head(&ctrl->ioabort_wait); diff --git a/drivers/nvme/host/fc.h b/drivers/nvme/host/fc.h new file mode 100644 index 000000000000..05ce566f2caf --- /dev/null +++ b/drivers/nvme/host/fc.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2016, Avago Technologies + */ + +#ifndef _NVME_FC_TRANSPORT_H +#define _NVME_FC_TRANSPORT_H 1 + + +/* + * Common definitions between the nvme_fc (host) transport and + * nvmet_fc (target) transport implementation. + */ + +/* + * ****************** FC-NVME LS HANDLING ****************** + */ + +union nvmefc_ls_requests { + struct fcnvme_ls_rqst_w0 w0; + struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc; + struct fcnvme_ls_cr_conn_rqst rq_cr_conn; + struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc; + struct fcnvme_ls_disconnect_conn_rqst rq_dis_conn; +} __aligned(128); /* alignment for other things alloc'd with */ + +union nvmefc_ls_responses { + struct fcnvme_ls_rjt rsp_rjt; + struct fcnvme_ls_cr_assoc_acc rsp_cr_assoc; + struct fcnvme_ls_cr_conn_acc rsp_cr_conn; + struct fcnvme_ls_disconnect_assoc_acc rsp_dis_assoc; + struct fcnvme_ls_disconnect_conn_acc rsp_dis_conn; +} __aligned(128); /* alignment for other things alloc'd with */ + +static inline void +nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) +{ + struct fcnvme_ls_acc_hdr *acc = buf; + + acc->w0.ls_cmd = ls_cmd; + acc->desc_list_len = desc_len; + acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); + acc->rqst.desc_len = + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); + acc->rqst.w0.ls_cmd = rqst_ls_cmd; +} + +static inline int +nvme_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, + u8 reason, u8 explanation, u8 vendor) +{ + struct fcnvme_ls_rjt *rjt = buf; + + nvme_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, + fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), + ls_cmd); + rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); + rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); + rjt->rjt.reason_code = reason; + rjt->rjt.reason_explanation = explanation; + rjt->rjt.vendor = vendor; + + return sizeof(struct fcnvme_ls_rjt); +} + +/* Validation Error indexes into the string table below */ +enum { + VERR_NO_ERROR = 0, + VERR_CR_ASSOC_LEN = 1, + VERR_CR_ASSOC_RQST_LEN = 2, + VERR_CR_ASSOC_CMD = 3, + VERR_CR_ASSOC_CMD_LEN = 4, + VERR_ERSP_RATIO = 5, + VERR_ASSOC_ALLOC_FAIL = 6, + VERR_QUEUE_ALLOC_FAIL = 7, + VERR_CR_CONN_LEN = 8, + VERR_CR_CONN_RQST_LEN = 9, + VERR_ASSOC_ID = 10, + VERR_ASSOC_ID_LEN = 11, + VERR_NO_ASSOC = 12, + VERR_CONN_ID = 13, + VERR_CONN_ID_LEN = 14, + VERR_INVAL_CONN = 15, + VERR_CR_CONN_CMD = 16, + VERR_CR_CONN_CMD_LEN = 17, + VERR_DISCONN_LEN = 18, + VERR_DISCONN_RQST_LEN = 19, + VERR_DISCONN_CMD = 20, + VERR_DISCONN_CMD_LEN = 21, + VERR_DISCONN_SCOPE = 22, + VERR_RS_LEN = 23, + VERR_RS_RQST_LEN = 24, + VERR_RS_CMD = 25, + VERR_RS_CMD_LEN = 26, + VERR_RS_RCTL = 27, + VERR_RS_RO = 28, + VERR_LSACC = 29, + VERR_LSDESC_RQST = 30, + VERR_LSDESC_RQST_LEN = 31, + VERR_CR_ASSOC = 32, + VERR_CR_ASSOC_ACC_LEN = 33, + VERR_CR_CONN = 34, + VERR_CR_CONN_ACC_LEN = 35, + VERR_DISCONN = 36, + VERR_DISCONN_ACC_LEN = 37, +}; + +static char *validation_errors[] = { + "OK", + "Bad CR_ASSOC Length", + "Bad CR_ASSOC Rqst Length", + "Not CR_ASSOC Cmd", + "Bad CR_ASSOC Cmd Length", + "Bad Ersp Ratio", + "Association Allocation Failed", + "Queue Allocation Failed", + "Bad CR_CONN Length", + "Bad CR_CONN Rqst Length", + "Not Association ID", + "Bad Association ID Length", + "No Association", + "Not Connection ID", + "Bad Connection ID Length", + "Invalid Connection ID", + "Not CR_CONN Cmd", + "Bad CR_CONN Cmd Length", + "Bad DISCONN Length", + "Bad DISCONN Rqst Length", + "Not DISCONN Cmd", + "Bad DISCONN Cmd Length", + "Bad Disconnect Scope", + "Bad RS Length", + "Bad RS Rqst Length", + "Not RS Cmd", + "Bad RS Cmd Length", + "Bad RS R_CTL", + "Bad RS Relative Offset", + "Not LS_ACC", + "Not LSDESC_RQST", + "Bad LSDESC_RQST Length", + "Not CR_ASSOC Rqst", + "Bad CR_ASSOC ACC Length", + "Not CR_CONN Rqst", + "Bad CR_CONN ACC Length", + "Not Disconnect Rqst", + "Bad Disconnect ACC Length", +}; + +#define NVME_FC_LAST_LS_CMD_VALUE FCNVME_LS_DISCONNECT_CONN + +static char *nvmefc_ls_names[] = { + "Reserved (0)", + "RJT (1)", + "ACC (2)", + "Create Association", + "Create Connection", + "Disconnect Association", + "Disconnect Connection", +}; + +static inline void +nvmefc_fmt_lsreq_discon_assoc(struct nvmefc_ls_req *lsreq, + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst, + struct fcnvme_ls_disconnect_assoc_acc *discon_acc, + u64 association_id) +{ + lsreq->rqstaddr = discon_rqst; + lsreq->rqstlen = sizeof(*discon_rqst); + lsreq->rspaddr = discon_acc; + lsreq->rsplen = sizeof(*discon_acc); + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; + + discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; + discon_rqst->desc_list_len = cpu_to_be32( + sizeof(struct fcnvme_lsdesc_assoc_id) + + sizeof(struct fcnvme_lsdesc_disconn_cmd)); + + discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); + discon_rqst->associd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id)); + + discon_rqst->associd.association_id = cpu_to_be64(association_id); + + discon_rqst->discon_cmd.desc_tag = cpu_to_be32( + FCNVME_LSDESC_DISCONN_CMD); + discon_rqst->discon_cmd.desc_len = + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd)); +} + +static inline int +nvmefc_vldt_lsreq_discon_assoc(u32 rqstlen, + struct fcnvme_ls_disconnect_assoc_rqst *rqst) +{ + int ret = 0; + + if (rqstlen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) + ret = VERR_DISCONN_LEN; + else if (rqst->desc_list_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) + ret = VERR_DISCONN_RQST_LEN; + else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) + ret = VERR_ASSOC_ID; + else if (rqst->associd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_assoc_id))) + ret = VERR_ASSOC_ID_LEN; + else if (rqst->discon_cmd.desc_tag != + cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) + ret = VERR_DISCONN_CMD; + else if (rqst->discon_cmd.desc_len != + fcnvme_lsdesc_len( + sizeof(struct fcnvme_lsdesc_disconn_cmd))) + ret = VERR_DISCONN_CMD_LEN; + /* + * As the standard changed on the LS, check if old format and scope + * something other than Association (e.g. 0). + */ + else if (rqst->discon_cmd.rsvd8[0]) + ret = VERR_DISCONN_SCOPE; + + return ret; +} + +#endif /* _NVME_FC_TRANSPORT_H */ diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c index ec46693f6b64..69608755d415 100644 --- a/drivers/nvme/host/lightnvm.c +++ b/drivers/nvme/host/lightnvm.c @@ -171,7 +171,7 @@ struct nvme_nvm_bb_tbl { __le32 tdresv; __le32 thresv; __le32 rsvd2[8]; - __u8 blk[0]; + __u8 blk[]; }; struct nvme_nvm_id20_addrf { @@ -961,7 +961,10 @@ int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node) geo = &dev->geo; geo->csecs = 1 << ns->lba_shift; geo->sos = ns->ms; - geo->ext = ns->ext; + if (ns->features & NVME_NS_EXT_LBAS) + geo->ext = true; + else + geo->ext = false; geo->mdts = ns->ctrl->max_hw_sectors; dev->q = q; diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 54603bd3e02d..da78e499947a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -3,6 +3,7 @@ * Copyright (c) 2017-2018 Christoph Hellwig. */ +#include <linux/backing-dev.h> #include <linux/moduleparam.h> #include <trace/events/block.h> #include "nvme.h" @@ -293,7 +294,7 @@ static bool nvme_available_path(struct nvme_ns_head *head) static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, struct bio *bio) { - struct nvme_ns_head *head = q->queuedata; + struct nvme_ns_head *head = bio->bi_disk->private_data; struct device *dev = disk_to_dev(head->disk); struct nvme_ns *ns; blk_qc_t ret = BLK_QC_T_NONE; @@ -371,13 +372,12 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) * We also do this for private namespaces as the namespace sharing data could * change after a rescan. */ - if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) return 0; q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); if (!q) goto out; - q->queuedata = head; blk_queue_flag_set(QUEUE_FLAG_NONROT, q); /* set to a default value for 512 until disk is validated */ blk_queue_logical_block_size(q, 512); @@ -666,6 +666,13 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id) nvme_mpath_set_live(ns); mutex_unlock(&ns->head->lock); } + + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { + struct backing_dev_info *info = + ns->head->disk->queue->backing_dev_info; + + info->capabilities |= BDI_CAP_STABLE_WRITES; + } } void nvme_mpath_remove_disk(struct nvme_ns_head *head) @@ -687,7 +694,8 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) int error; /* check if multipath is enabled and we have the capability */ - if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3))) + if (!multipath || !ctrl->subsys || + !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) return 0; ctrl->anacap = id->anacap; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2e04a36296d9..fa5c75501049 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -16,6 +16,7 @@ #include <linux/fault-inject.h> #include <linux/rcupdate.h> #include <linux/wait.h> +#include <linux/t10-pi.h> #include <trace/events/block.h> @@ -30,8 +31,10 @@ extern unsigned int admin_timeout; #ifdef CONFIG_ARCH_NO_SG_CHAIN #define NVME_INLINE_SG_CNT 0 +#define NVME_INLINE_METADATA_SG_CNT 0 #else #define NVME_INLINE_SG_CNT 2 +#define NVME_INLINE_METADATA_SG_CNT 1 #endif extern struct workqueue_struct *nvme_wq; @@ -228,6 +231,7 @@ struct nvme_ctrl { u32 page_size; u32 max_hw_sectors; u32 max_segments; + u32 max_integrity_segments; u16 crdt[3]; u16 oncs; u16 oacs; @@ -352,6 +356,7 @@ struct nvme_ns_head { struct nvme_ns_ids ids; struct list_head entry; struct kref ref; + bool shared; int instance; #ifdef CONFIG_NVME_MULTIPATH struct gendisk *disk; @@ -363,6 +368,11 @@ struct nvme_ns_head { #endif }; +enum nvme_ns_features { + NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ + NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ +}; + struct nvme_ns { struct list_head list; @@ -382,18 +392,23 @@ struct nvme_ns { u16 ms; u16 sgs; u32 sws; - bool ext; u8 pi_type; + unsigned long features; unsigned long flags; #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 #define NVME_NS_ANA_PENDING 2 - u16 noiob; struct nvme_fault_inject fault_inject; }; +/* NVMe ns supports metadata actions by the controller (generate/strip) */ +static inline bool nvme_ns_has_pi(struct nvme_ns *ns) +{ + return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); +} + struct nvme_ctrl_ops { const char *name; struct module *module; @@ -449,6 +464,14 @@ static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) return lba << (ns->lba_shift - SECTOR_SHIFT); } +/* + * Convert byte length to nvme's 0-based num dwords + */ +static inline u32 nvme_bytes_to_numd(size_t len) +{ + return (len >> 2) - 1; +} + static inline void nvme_end_request(struct request *req, __le16 status, union nvme_result result) { @@ -489,7 +512,6 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl); void nvme_stop_ctrl(struct nvme_ctrl *ctrl); -void nvme_put_ctrl(struct nvme_ctrl *ctrl); int nvme_init_identify(struct nvme_ctrl *ctrl); void nvme_remove_namespaces(struct nvme_ctrl *ctrl); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cc46e250fcac..d690d5593a80 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -68,14 +68,30 @@ static int io_queue_depth = 1024; module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); +static int io_queue_count_set(const char *val, const struct kernel_param *kp) +{ + unsigned int n; + int ret; + + ret = kstrtouint(val, 10, &n); + if (ret != 0 || n > num_possible_cpus()) + return -EINVAL; + return param_set_uint(val, kp); +} + +static const struct kernel_param_ops io_queue_count_ops = { + .set = io_queue_count_set, + .get = param_get_uint, +}; + static unsigned int write_queues; -module_param(write_queues, uint, 0644); +module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644); MODULE_PARM_DESC(write_queues, "Number of queues to use for writes. If not set, reads and writes " "will share a queue set."); static unsigned int poll_queues; -module_param(poll_queues, uint, 0644); +module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644); MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); struct nvme_dev; @@ -128,6 +144,9 @@ struct nvme_dev { dma_addr_t host_mem_descs_dma; struct nvme_host_mem_buf_desc *host_mem_descs; void **host_mem_desc_bufs; + unsigned int nr_allocated_queues; + unsigned int nr_write_queues; + unsigned int nr_poll_queues; }; static int io_queue_depth_set(const char *val, const struct kernel_param *kp) @@ -166,14 +185,13 @@ struct nvme_queue { void *sq_cmds; /* only used for poll queues: */ spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; - volatile struct nvme_completion *cqes; + struct nvme_completion *cqes; dma_addr_t sq_dma_addr; dma_addr_t cq_dma_addr; u32 __iomem *q_db; u16 q_depth; u16 cq_vector; u16 sq_tail; - u16 last_sq_tail; u16 cq_head; u16 qid; u8 cq_phase; @@ -209,25 +227,14 @@ struct nvme_iod { struct scatterlist *sg; }; -static unsigned int max_io_queues(void) +static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) { - return num_possible_cpus() + write_queues + poll_queues; -} - -static unsigned int max_queue_count(void) -{ - /* IO queues + admin queue */ - return 1 + max_io_queues(); -} - -static inline unsigned int nvme_dbbuf_size(u32 stride) -{ - return (max_queue_count() * 8 * stride); + return dev->nr_allocated_queues * 8 * dev->db_stride; } static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) { - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + unsigned int mem_size = nvme_dbbuf_size(dev); if (dev->dbbuf_dbs) return 0; @@ -252,7 +259,7 @@ static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) static void nvme_dbbuf_dma_free(struct nvme_dev *dev) { - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); + unsigned int mem_size = nvme_dbbuf_size(dev); if (dev->dbbuf_dbs) { dma_free_coherent(dev->dev, mem_size, @@ -446,24 +453,11 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } -/* - * Write sq tail if we are asked to, or if the next command would wrap. - */ -static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) { - if (!write_sq) { - u16 next_tail = nvmeq->sq_tail + 1; - - if (next_tail == nvmeq->q_depth) - next_tail = 0; - if (next_tail != nvmeq->last_sq_tail) - return; - } - if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) writel(nvmeq->sq_tail, nvmeq->q_db); - nvmeq->last_sq_tail = nvmeq->sq_tail; } /** @@ -480,7 +474,8 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - nvme_write_sq_db(nvmeq, write_sq); + if (write_sq) + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -489,8 +484,7 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) struct nvme_queue *nvmeq = hctx->driver_data; spin_lock(&nvmeq->sq_lock); - if (nvmeq->sq_tail != nvmeq->last_sq_tail) - nvme_write_sq_db(nvmeq, true); + nvme_write_sq_db(nvmeq); spin_unlock(&nvmeq->sq_lock); } @@ -922,8 +916,9 @@ static void nvme_pci_complete_rq(struct request *req) /* We read the CQE phase first to check if the rest of the entry is valid */ static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) { - return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == - nvmeq->cq_phase; + struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head]; + + return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase; } static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) @@ -944,7 +939,7 @@ static inline struct blk_mq_tags *nvme_queue_tagset(struct nvme_queue *nvmeq) static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) { - volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; + struct nvme_completion *cqe = &nvmeq->cqes[idx]; struct request *req; if (unlikely(cqe->command_id >= nvmeq->q_depth)) { @@ -1501,7 +1496,6 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) struct nvme_dev *dev = nvmeq->dev; nvmeq->sq_tail = 0; - nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; @@ -2003,7 +1997,7 @@ static int nvme_setup_host_mem(struct nvme_dev *dev) static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) { struct nvme_dev *dev = affd->priv; - unsigned int nr_read_queues; + unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; /* * If there is no interupt available for queues, ensure that @@ -2019,12 +2013,12 @@ static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) if (!nrirqs) { nrirqs = 1; nr_read_queues = 0; - } else if (nrirqs == 1 || !write_queues) { + } else if (nrirqs == 1 || !nr_write_queues) { nr_read_queues = 0; - } else if (write_queues >= nrirqs) { + } else if (nr_write_queues >= nrirqs) { nr_read_queues = 1; } else { - nr_read_queues = nrirqs - write_queues; + nr_read_queues = nrirqs - nr_write_queues; } dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; @@ -2048,7 +2042,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) * Poll queues don't need interrupts, but we need at least one IO * queue left over for non-polled IO. */ - this_p_queues = poll_queues; + this_p_queues = dev->nr_poll_queues; if (this_p_queues >= nr_io_queues) { this_p_queues = nr_io_queues - 1; irq_queues = 1; @@ -2078,14 +2072,25 @@ static void nvme_disable_io_queues(struct nvme_dev *dev) __nvme_disable_io_queues(dev, nvme_admin_delete_cq); } +static unsigned int nvme_max_io_queues(struct nvme_dev *dev) +{ + return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); - int result, nr_io_queues; + unsigned int nr_io_queues; unsigned long size; + int result; - nr_io_queues = max_io_queues(); + /* + * Sample the module parameters once at reset time so that we have + * stable values to work with. + */ + dev->nr_write_queues = write_queues; + dev->nr_poll_queues = poll_queues; /* * If tags are shared with admin queue (Apple bug), then @@ -2093,6 +2098,9 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) */ if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) nr_io_queues = 1; + else + nr_io_queues = min(nvme_max_io_queues(dev), + dev->nr_allocated_queues - 1); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) @@ -2565,6 +2573,12 @@ static void nvme_reset_work(struct work_struct *work) goto out; } + /* + * We do not support an SGL for metadata (yet), so we are limited to a + * single integrity segment for the separate metadata pointer. + */ + dev->ctrl.max_integrity_segments = 1; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; @@ -2767,8 +2781,11 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (!dev) return -ENOMEM; - dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue), - GFP_KERNEL, node); + dev->nr_write_queues = write_queues; + dev->nr_poll_queues = poll_queues; + dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1; + dev->queues = kcalloc_node(dev->nr_allocated_queues, + sizeof(struct nvme_queue), GFP_KERNEL, node); if (!dev->queues) goto free; @@ -3131,8 +3148,6 @@ static int __init nvme_init(void) BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); - write_queues = min(write_queues, num_possible_cpus()); - poll_queues = min(poll_queues, num_possible_cpus()); return pci_register_driver(&nvme_driver); } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index cac8a930396a..f8f856dc0c67 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -34,6 +34,11 @@ #define NVME_RDMA_MAX_INLINE_SEGMENTS 4 +#define NVME_RDMA_DATA_SGL_SIZE \ + (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT) +#define NVME_RDMA_METADATA_SGL_SIZE \ + (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT) + struct nvme_rdma_device { struct ib_device *dev; struct ib_pd *pd; @@ -48,6 +53,11 @@ struct nvme_rdma_qe { u64 dma; }; +struct nvme_rdma_sgl { + int nents; + struct sg_table sg_table; +}; + struct nvme_rdma_queue; struct nvme_rdma_request { struct nvme_request req; @@ -58,12 +68,12 @@ struct nvme_rdma_request { refcount_t ref; struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; u32 num_sge; - int nents; struct ib_reg_wr reg_wr; struct ib_cqe reg_cqe; struct nvme_rdma_queue *queue; - struct sg_table sg_table; - struct scatterlist first_sgl[]; + struct nvme_rdma_sgl data_sgl; + struct nvme_rdma_sgl *metadata_sgl; + bool use_sig_mr; }; enum nvme_rdma_queue_flags { @@ -85,6 +95,7 @@ struct nvme_rdma_queue { struct rdma_cm_id *cm_id; int cm_error; struct completion cm_done; + bool pi_support; }; struct nvme_rdma_ctrl { @@ -261,6 +272,8 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) init_attr.qp_type = IB_QPT_RC; init_attr.send_cq = queue->ib_cq; init_attr.recv_cq = queue->ib_cq; + if (queue->pi_support) + init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr); @@ -290,6 +303,12 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, if (!req->sqe.data) return -ENOMEM; + /* metadata nvme_rdma_sgl struct is located after command's data SGL */ + if (queue->pi_support) + req->metadata_sgl = (void *)nvme_req(rq) + + sizeof(struct nvme_rdma_request) + + NVME_RDMA_DATA_SGL_SIZE; + req->queue = queue; return 0; @@ -400,6 +419,8 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) dev = queue->device; ibdev = dev->dev; + if (queue->pi_support) + ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs); ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); /* @@ -416,10 +437,16 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) nvme_rdma_dev_put(dev); } -static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) +static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support) { - return min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ibdev->attrs.max_fast_reg_page_list_len - 1); + u32 max_page_list_len; + + if (pi_support) + max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len; + else + max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len; + + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1); } static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -476,7 +503,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) * misaligned we'll end up using two entries for a single data page, * so one additional entry is required. */ - pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1; ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, queue->queue_size, IB_MR_TYPE_MEM_REG, @@ -488,10 +515,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) goto out_destroy_ring; } + if (queue->pi_support) { + ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs, + queue->queue_size, IB_MR_TYPE_INTEGRITY, + pages_per_mr, pages_per_mr); + if (ret) { + dev_err(queue->ctrl->ctrl.device, + "failed to initialize PI MR pool sized %d for QID %d\n", + queue->queue_size, idx); + goto out_destroy_mr_pool; + } + } + set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); return 0; +out_destroy_mr_pool: + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); out_destroy_ring: nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, sizeof(struct nvme_completion), DMA_FROM_DEVICE); @@ -513,6 +554,10 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, queue = &ctrl->queues[idx]; queue->ctrl = ctrl; + if (idx && ctrl->ctrl.max_integrity_segments) + queue->pi_support = true; + else + queue->pi_support = false; init_completion(&queue->cm_done); if (idx > 0) @@ -723,7 +768,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = nctrl->numa_node; set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + NVME_RDMA_DATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = 1; set->timeout = ADMIN_TIMEOUT; @@ -737,7 +782,10 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, set->numa_node = nctrl->numa_node; set->flags = BLK_MQ_F_SHOULD_MERGE; set->cmd_size = sizeof(struct nvme_rdma_request) + - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); + NVME_RDMA_DATA_SGL_SIZE; + if (nctrl->max_integrity_segments) + set->cmd_size += sizeof(struct nvme_rdma_sgl) + + NVME_RDMA_METADATA_SGL_SIZE; set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; set->timeout = NVME_IO_TIMEOUT; @@ -770,6 +818,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, bool new) { + bool pi_capable = false; int error; error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); @@ -779,7 +828,13 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->device = ctrl->queues[0].device; ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); - ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); + /* T10-PI support */ + if (ctrl->device->dev->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER) + pi_capable = true; + + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, + pi_capable); /* * Bind the async event SQE DMA mapping to the admin queue lifetime. @@ -821,6 +876,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, ctrl->ctrl.max_segments = ctrl->max_fr_pages; ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); + if (pi_capable) + ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages; + else + ctrl->ctrl.max_integrity_segments = 0; blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); @@ -1149,17 +1208,29 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + struct list_head *pool = &queue->qp->rdma_mrs; if (!blk_rq_nr_phys_segments(rq)) return; + if (blk_integrity_rq(rq)) { + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); + } + + if (req->use_sig_mr) + pool = &queue->qp->sig_mrs; + if (req->mr) { - ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); + ib_mr_pool_put(queue->qp, pool, req->mr); req->mr = NULL; } - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); } static int nvme_rdma_set_sg_null(struct nvme_command *c) @@ -1178,7 +1249,7 @@ static int nvme_rdma_map_sg_inline(struct nvme_rdma_queue *queue, int count) { struct nvme_sgl_desc *sg = &c->common.dptr.sgl; - struct scatterlist *sgl = req->sg_table.sgl; + struct scatterlist *sgl = req->data_sgl.sg_table.sgl; struct ib_sge *sge = &req->sge[1]; u32 len = 0; int i; @@ -1203,8 +1274,8 @@ static int nvme_rdma_map_sg_single(struct nvme_rdma_queue *queue, { struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; - sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); - put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); + sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl)); + put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length); put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; return 0; @@ -1225,7 +1296,8 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, * Align the MR to a 4K page size to match the ctrl page size and * the block virtual boundary. */ - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); + nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL, + SZ_4K); if (unlikely(nr < count)) { ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); req->mr = NULL; @@ -1256,12 +1328,125 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, return 0; } +static void nvme_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control, u8 pi_type) +{ + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; + + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_escape = true; + if (pi_type == NVME_NS_DPS_PI_TYPE3) + domain->sig.dif.ref_escape = true; +} + +static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs, + u8 pi_type) +{ + u16 control = le16_to_cpu(cmd->rw.control); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no memory domain */ + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + } +} + +static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask) +{ + *mask = 0; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF) + *mask |= IB_SIG_CHECK_REFTAG; + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD) + *mask |= IB_SIG_CHECK_GUARD; +} + +static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc) +{ + if (unlikely(wc->status != IB_WC_SUCCESS)) + nvme_rdma_wr_error(cq, wc, "SIG"); +} + +static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, + struct nvme_rdma_request *req, struct nvme_command *c, + int count, int pi_count) +{ + struct nvme_rdma_sgl *sgl = &req->data_sgl; + struct ib_reg_wr *wr = &req->reg_wr; + struct request *rq = blk_mq_rq_from_pdu(req); + struct nvme_ns *ns = rq->q->queuedata; + struct bio *bio = rq->bio; + struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; + int nr; + + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); + if (WARN_ON_ONCE(!req->mr)) + return -EAGAIN; + + nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL, + req->metadata_sgl->sg_table.sgl, pi_count, NULL, + SZ_4K); + if (unlikely(nr)) + goto mr_put; + + nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c, + req->mr->sig_attrs, ns->pi_type); + nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); + + ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); + + req->reg_cqe.done = nvme_rdma_sig_done; + memset(wr, 0, sizeof(*wr)); + wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; + wr->wr.wr_cqe = &req->reg_cqe; + wr->wr.num_sge = 0; + wr->wr.send_flags = 0; + wr->mr = req->mr; + wr->key = req->mr->rkey; + wr->access = IB_ACCESS_LOCAL_WRITE | + IB_ACCESS_REMOTE_READ | + IB_ACCESS_REMOTE_WRITE; + + sg->addr = cpu_to_le64(req->mr->iova); + put_unaligned_le24(req->mr->length, sg->length); + put_unaligned_le32(req->mr->rkey, sg->key); + sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; + + return 0; + +mr_put: + ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr); + req->mr = NULL; + if (nr < 0) + return nr; + return -EINVAL; +} + static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, struct request *rq, struct nvme_command *c) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; + int pi_count = 0; int count, ret; req->num_sge = 1; @@ -1272,22 +1457,52 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, if (!blk_rq_nr_phys_segments(rq)) return nvme_rdma_set_sg_null(c); - req->sg_table.sgl = req->first_sgl; - ret = sg_alloc_table_chained(&req->sg_table, - blk_rq_nr_phys_segments(rq), req->sg_table.sgl, + req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); + ret = sg_alloc_table_chained(&req->data_sgl.sg_table, + blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl, NVME_INLINE_SG_CNT); if (ret) return -ENOMEM; - req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); + req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, + req->data_sgl.sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, - rq_dma_dir(rq)); + count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, + req->data_sgl.nents, rq_dma_dir(rq)); if (unlikely(count <= 0)) { ret = -EIO; goto out_free_table; } + if (blk_integrity_rq(rq)) { + req->metadata_sgl->sg_table.sgl = + (struct scatterlist *)(req->metadata_sgl + 1); + ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table, + blk_rq_count_integrity_sg(rq->q, rq->bio), + req->metadata_sgl->sg_table.sgl, + NVME_INLINE_METADATA_SG_CNT); + if (unlikely(ret)) { + ret = -ENOMEM; + goto out_unmap_sg; + } + + req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, + rq->bio, req->metadata_sgl->sg_table.sgl); + pi_count = ib_dma_map_sg(ibdev, + req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, + rq_dma_dir(rq)); + if (unlikely(pi_count <= 0)) { + ret = -EIO; + goto out_free_pi_table; + } + } + + if (req->use_sig_mr) { + ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count); + goto out; + } + if (count <= dev->num_inline_segments) { if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && queue->ctrl->use_inline_data && @@ -1306,14 +1521,23 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, ret = nvme_rdma_map_sg_fr(queue, req, c, count); out: if (unlikely(ret)) - goto out_unmap_sg; + goto out_unmap_pi_sg; return 0; +out_unmap_pi_sg: + if (blk_integrity_rq(rq)) + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, + req->metadata_sgl->nents, rq_dma_dir(rq)); +out_free_pi_table: + if (blk_integrity_rq(rq)) + sg_free_table_chained(&req->metadata_sgl->sg_table, + NVME_INLINE_METADATA_SG_CNT); out_unmap_sg: - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, + rq_dma_dir(rq)); out_free_table: - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); return ret; } @@ -1761,6 +1985,15 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && + queue->pi_support && + (c->common.opcode == nvme_cmd_write || + c->common.opcode == nvme_cmd_read) && + nvme_ns_has_pi(ns)) + req->use_sig_mr = true; + else + req->use_sig_mr = false; + err = nvme_rdma_map_data(queue, rq, c); if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, @@ -1801,12 +2034,46 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) return ib_process_cq_direct(queue->ib_cq, -1); } +static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req) +{ + struct request *rq = blk_mq_rq_from_pdu(req); + struct ib_mr_status mr_status; + int ret; + + ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + nvme_req(rq)->status = NVME_SC_INVALID_PI; + return; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + nvme_req(rq)->status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + nvme_req(rq)->status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + nvme_req(rq)->status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, mr_status.sig_err.expected, + mr_status.sig_err.actual); + } +} + static void nvme_rdma_complete_rq(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; struct ib_device *ibdev = queue->device->dev; + if (req->use_sig_mr) + nvme_rdma_check_pi_status(req); + nvme_rdma_unmap_data(queue, rq); ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -1926,7 +2193,7 @@ out_fail: static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .name = "rdma", .module = THIS_MODULE, - .flags = NVME_F_FABRICS, + .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED, .reg_read32 = nvmf_reg_read32, .reg_read64 = nvmf_reg_read64, .reg_write32 = nvmf_reg_write32, diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index c15a92163c1f..7c7c1886642f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -60,6 +60,7 @@ struct nvme_tcp_request { enum nvme_tcp_queue_flags { NVME_TCP_Q_ALLOCATED = 0, NVME_TCP_Q_LIVE = 1, + NVME_TCP_Q_POLLING = 2, }; enum nvme_tcp_recv_state { @@ -75,6 +76,7 @@ struct nvme_tcp_queue { int io_cpu; spinlock_t lock; + struct mutex send_mutex; struct list_head send_list; /* recv state */ @@ -131,6 +133,7 @@ static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); static struct workqueue_struct *nvme_tcp_wq; static struct blk_mq_ops nvme_tcp_mq_ops; static struct blk_mq_ops nvme_tcp_admin_mq_ops; +static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) { @@ -257,15 +260,29 @@ static inline void nvme_tcp_advance_req(struct nvme_tcp_request *req, } } -static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req) +static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, + bool sync) { struct nvme_tcp_queue *queue = req->queue; + bool empty; spin_lock(&queue->lock); + empty = list_empty(&queue->send_list) && !queue->request; list_add_tail(&req->entry, &queue->send_list); spin_unlock(&queue->lock); - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + /* + * if we're the first on the send_list and we can try to send + * directly, otherwise queue io_work. Also, only do that if we + * are on the same cpu, so we don't introduce contention. + */ + if (queue->io_cpu == smp_processor_id() && + sync && empty && mutex_trylock(&queue->send_mutex)) { + nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + } else { + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); + } } static inline struct nvme_tcp_request * @@ -578,7 +595,7 @@ static int nvme_tcp_handle_r2t(struct nvme_tcp_queue *queue, req->state = NVME_TCP_SEND_H2C_PDU; req->offset = 0; - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, false); return 0; } @@ -794,11 +811,12 @@ static void nvme_tcp_data_ready(struct sock *sk) { struct nvme_tcp_queue *queue; - read_lock(&sk->sk_callback_lock); + read_lock_bh(&sk->sk_callback_lock); queue = sk->sk_user_data; - if (likely(queue && queue->rd_enabled)) + if (likely(queue && queue->rd_enabled) && + !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); - read_unlock(&sk->sk_callback_lock); + read_unlock_bh(&sk->sk_callback_lock); } static void nvme_tcp_write_space(struct sock *sk) @@ -867,7 +885,7 @@ static int nvme_tcp_try_send_data(struct nvme_tcp_request *req) if (last && !queue->data_digest) flags |= MSG_EOR; else - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; /* can't zcopy slab pages */ if (unlikely(PageSlab(page))) { @@ -906,11 +924,16 @@ static int nvme_tcp_try_send_cmd_pdu(struct nvme_tcp_request *req) struct nvme_tcp_queue *queue = req->queue; struct nvme_tcp_cmd_pdu *pdu = req->pdu; bool inline_data = nvme_tcp_has_inline_data(req); - int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR); u8 hdgst = nvme_tcp_hdgst_len(queue); int len = sizeof(*pdu) + hdgst - req->offset; + int flags = MSG_DONTWAIT; int ret; + if (inline_data) + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; + else + flags |= MSG_EOR; + if (queue->hdr_digest && !req->offset) nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); @@ -949,7 +972,7 @@ static int nvme_tcp_try_send_data_pdu(struct nvme_tcp_request *req) ret = kernel_sendpage(queue->sock, virt_to_page(pdu), offset_in_page(pdu) + req->offset, len, - MSG_DONTWAIT | MSG_MORE); + MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); if (unlikely(ret <= 0)) return ret; @@ -1063,11 +1086,14 @@ static void nvme_tcp_io_work(struct work_struct *w) bool pending = false; int result; - result = nvme_tcp_try_send(queue); - if (result > 0) - pending = true; - else if (unlikely(result < 0)) - break; + if (mutex_trylock(&queue->send_mutex)) { + result = nvme_tcp_try_send(queue); + mutex_unlock(&queue->send_mutex); + if (result > 0) + pending = true; + else if (unlikely(result < 0)) + break; + } result = nvme_tcp_try_recv(queue); if (result > 0) @@ -1319,6 +1345,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, queue->ctrl = ctrl; INIT_LIST_HEAD(&queue->send_list); spin_lock_init(&queue->lock); + mutex_init(&queue->send_mutex); INIT_WORK(&queue->io_work, nvme_tcp_io_work); queue->queue_size = queue_size; @@ -1543,6 +1570,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; set->reserved_tags = 2; /* connect + keep-alive */ set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = 1; @@ -1554,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl, set->queue_depth = nctrl->sqsize + 1; set->reserved_tags = 1; /* fabric connect */ set->numa_node = NUMA_NO_NODE; - set->flags = BLK_MQ_F_SHOULD_MERGE; + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; set->cmd_size = sizeof(struct nvme_tcp_request); set->driver_data = ctrl; set->nr_hw_queues = nctrl->queue_count - 1; @@ -2113,7 +2141,7 @@ static void nvme_tcp_submit_async_event(struct nvme_ctrl *arg) ctrl->async_req.curr_bio = NULL; ctrl->async_req.data_len = 0; - nvme_tcp_queue_request(&ctrl->async_req); + nvme_tcp_queue_request(&ctrl->async_req, true); } static enum blk_eh_timer_return @@ -2244,7 +2272,7 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); - nvme_tcp_queue_request(req); + nvme_tcp_queue_request(req, true); return BLK_STS_OK; } @@ -2302,9 +2330,11 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) return 0; + set_bit(NVME_TCP_Q_POLLING, &queue->flags); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) sk_busy_loop(sk, true); nvme_tcp_try_recv(queue); + clear_bit(NVME_TCP_Q_POLLING, &queue->flags); return queue->nr_cqe; } diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index d7f48c0fb311..4474952d64c6 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -4,6 +4,7 @@ config NVME_TARGET tristate "NVMe Target support" depends on BLOCK depends on CONFIGFS_FS + select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY select SGL_ALLOC help This enabled target side support for the NVMe protocol, that is diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 9d6f75cfa77c..1db8c0498668 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -295,7 +295,7 @@ out: static void nvmet_execute_get_log_page(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd))) + if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd))) return; switch (req->cmd->get_log_page.lid) { @@ -341,6 +341,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; + u32 cmd_capsule_size; u16 status = 0; id = kzalloc(sizeof(*id), GFP_KERNEL); @@ -433,9 +434,15 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); - /* Max command capsule size is sqe + single page of in-capsule data */ - id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + - req->port->inline_data_size) / 16); + /* + * Max command capsule size is sqe + in-capsule data size. + * Disable in-capsule data for Metadata capable controllers. + */ + cmd_capsule_size = sizeof(struct nvme_command); + if (!ctrl->pi_support) + cmd_capsule_size += req->port->inline_data_size; + id->ioccsz = cpu_to_le32(cmd_capsule_size / 16); + /* Max response capsule size is cqe */ id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); @@ -465,6 +472,7 @@ out: static void nvmet_execute_identify_ns(struct nvmet_req *req) { + struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvmet_ns *ns; struct nvme_id_ns *id; u16 status = 0; @@ -482,10 +490,12 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) } /* return an all zeroed buffer if we can't find an active namespace */ - ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); + ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); if (!ns) goto done; + nvmet_ns_revalidate(ns); + /* * nuse = ncap = nsze isn't always true, but we have no way to find * that out from the underlying device. @@ -521,6 +531,16 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) id->lbaf[0].ds = ns->blksize_shift; + if (ctrl->pi_support && nvmet_ns_has_pi(ns)) { + id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST | + NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 | + NVME_NS_DPC_PI_TYPE3; + id->mc = NVME_MC_EXTENDED_LBA; + id->dps = ns->pi_type; + id->flbas = NVME_NS_FLBAS_META_EXT; + id->lbaf[0].ms = cpu_to_le16(ns->metadata_size); + } + if (ns->readonly) id->nsattr |= (1 << 0); nvmet_put_namespace(ns); @@ -625,7 +645,7 @@ out: static void nvmet_execute_identify(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; switch (req->cmd->identify.cns) { @@ -654,7 +674,7 @@ static void nvmet_execute_identify(struct nvmet_req *req) */ static void nvmet_execute_abort(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; nvmet_set_result(req, 1); nvmet_req_complete(req, 0); @@ -743,7 +763,7 @@ static void nvmet_execute_set_features(struct nvmet_req *req) u16 nsqr; u16 ncqr; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -815,7 +835,7 @@ static void nvmet_execute_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; - if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10))) + if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10))) return; switch (cdw10 & 0xff) { @@ -882,7 +902,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; mutex_lock(&ctrl->lock); @@ -901,7 +921,7 @@ void nvmet_execute_keep_alive(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; pr_debug("ctrl %d update keep-alive timer for %d secs\n", diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 58cabd7b6fc5..419e0d4ce79b 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -20,61 +20,71 @@ static const struct config_item_type nvmet_subsys_type; static LIST_HEAD(nvmet_ports_list); struct list_head *nvmet_ports = &nvmet_ports_list; -static const struct nvmet_transport_name { +struct nvmet_type_name_map { u8 type; const char *name; -} nvmet_transport_names[] = { +}; + +static struct nvmet_type_name_map nvmet_transport[] = { { NVMF_TRTYPE_RDMA, "rdma" }, { NVMF_TRTYPE_FC, "fc" }, { NVMF_TRTYPE_TCP, "tcp" }, { NVMF_TRTYPE_LOOP, "loop" }, }; +static const struct nvmet_type_name_map nvmet_addr_family[] = { + { NVMF_ADDR_FAMILY_PCI, "pcie" }, + { NVMF_ADDR_FAMILY_IP4, "ipv4" }, + { NVMF_ADDR_FAMILY_IP6, "ipv6" }, + { NVMF_ADDR_FAMILY_IB, "ib" }, + { NVMF_ADDR_FAMILY_FC, "fc" }, + { NVMF_ADDR_FAMILY_LOOP, "loop" }, +}; + +static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) +{ + if (p->enabled) + pr_err("Disable port '%u' before changing attribute in %s\n", + le16_to_cpu(p->disc_addr.portid), caller); + return p->enabled; +} + /* * nvmet_port Generic ConfigFS definitions. * Used in any place in the ConfigFS tree that refers to an address. */ -static ssize_t nvmet_addr_adrfam_show(struct config_item *item, - char *page) +static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.adrfam) { - case NVMF_ADDR_FAMILY_IP4: - return sprintf(page, "ipv4\n"); - case NVMF_ADDR_FAMILY_IP6: - return sprintf(page, "ipv6\n"); - case NVMF_ADDR_FAMILY_IB: - return sprintf(page, "ib\n"); - case NVMF_ADDR_FAMILY_FC: - return sprintf(page, "fc\n"); - default: - return sprintf(page, "\n"); + u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam; + int i; + + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { + if (nvmet_addr_family[i].type == adrfam) + return sprintf(page, "%s\n", nvmet_addr_family[i].name); } + + return sprintf(page, "\n"); } static ssize_t nvmet_addr_adrfam_store(struct config_item *item, const char *page, size_t count) { struct nvmet_port *port = to_nvmet_port(item); + int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } - if (sysfs_streq(page, "ipv4")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4; - } else if (sysfs_streq(page, "ipv6")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6; - } else if (sysfs_streq(page, "ib")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB; - } else if (sysfs_streq(page, "fc")) { - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC; - } else { - pr_err("Invalid value '%s' for adrfam\n", page); - return -EINVAL; + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { + if (sysfs_streq(page, nvmet_addr_family[i].name)) + goto found; } + pr_err("Invalid value '%s' for adrfam\n", page); + return -EINVAL; + +found: + port->disc_addr.adrfam = nvmet_addr_family[i].type; return count; } @@ -100,11 +110,9 @@ static ssize_t nvmet_addr_portid_store(struct config_item *item, return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } + port->disc_addr.portid = cpu_to_le16(portid); return count; } @@ -130,11 +138,8 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item, return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1) return -EINVAL; @@ -143,20 +148,24 @@ static ssize_t nvmet_addr_traddr_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, addr_traddr); -static ssize_t nvmet_addr_treq_show(struct config_item *item, - char *page) +static const struct nvmet_type_name_map nvmet_addr_treq[] = { + { NVMF_TREQ_NOT_SPECIFIED, "not specified" }, + { NVMF_TREQ_REQUIRED, "required" }, + { NVMF_TREQ_NOT_REQUIRED, "not required" }, +}; + +static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) { - switch (to_nvmet_port(item)->disc_addr.treq & - NVME_TREQ_SECURE_CHANNEL_MASK) { - case NVMF_TREQ_NOT_SPECIFIED: - return sprintf(page, "not specified\n"); - case NVMF_TREQ_REQUIRED: - return sprintf(page, "required\n"); - case NVMF_TREQ_NOT_REQUIRED: - return sprintf(page, "not required\n"); - default: - return sprintf(page, "\n"); + u8 treq = to_nvmet_port(item)->disc_addr.treq & + NVME_TREQ_SECURE_CHANNEL_MASK; + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { + if (treq == nvmet_addr_treq[i].type) + return sprintf(page, "%s\n", nvmet_addr_treq[i].name); } + + return sprintf(page, "\n"); } static ssize_t nvmet_addr_treq_store(struct config_item *item, @@ -164,25 +173,22 @@ static ssize_t nvmet_addr_treq_store(struct config_item *item, { struct nvmet_port *port = to_nvmet_port(item); u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; + int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } - if (sysfs_streq(page, "not specified")) { - treq |= NVMF_TREQ_NOT_SPECIFIED; - } else if (sysfs_streq(page, "required")) { - treq |= NVMF_TREQ_REQUIRED; - } else if (sysfs_streq(page, "not required")) { - treq |= NVMF_TREQ_NOT_REQUIRED; - } else { - pr_err("Invalid value '%s' for treq\n", page); - return -EINVAL; + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { + if (sysfs_streq(page, nvmet_addr_treq[i].name)) + goto found; } - port->disc_addr.treq = treq; + pr_err("Invalid value '%s' for treq\n", page); + return -EINVAL; + +found: + treq |= nvmet_addr_treq[i].type; + port->disc_addr.treq = treq; return count; } @@ -206,11 +212,8 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, pr_err("Invalid value '%s' for trsvcid\n", page); return -EINVAL; } - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1) return -EINVAL; @@ -233,11 +236,8 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int ret; - if (port->enabled) { - pr_err("Cannot modify inline_data_size while port enabled\n"); - pr_err("Disable the port before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } ret = kstrtoint(page, 0, &port->inline_data_size); if (ret) { pr_err("Invalid value '%s' for inline_data_size\n", page); @@ -248,16 +248,45 @@ static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, param_inline_data_size); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_param_pi_enable_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable); +} + +static ssize_t nvmet_param_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + if (port->enabled) { + pr_err("Disable port before setting pi_enable value.\n"); + return -EACCES; + } + + port->pi_enable = val; + return count; +} + +CONFIGFS_ATTR(nvmet_, param_pi_enable); +#endif + static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { struct nvmet_port *port = to_nvmet_port(item); int i; - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { - if (port->disc_addr.trtype != nvmet_transport_names[i].type) - continue; - return sprintf(page, "%s\n", nvmet_transport_names[i].name); + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { + if (port->disc_addr.trtype == nvmet_transport[i].type) + return sprintf(page, "%s\n", nvmet_transport[i].name); } return sprintf(page, "\n"); @@ -276,22 +305,20 @@ static ssize_t nvmet_addr_trtype_store(struct config_item *item, struct nvmet_port *port = to_nvmet_port(item); int i; - if (port->enabled) { - pr_err("Cannot modify address while enabled\n"); - pr_err("Disable the address before modifying\n"); + if (nvmet_is_port_enabled(port, __func__)) return -EACCES; - } - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { - if (sysfs_streq(page, nvmet_transport_names[i].name)) + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { + if (sysfs_streq(page, nvmet_transport[i].name)) goto found; } pr_err("Invalid value '%s' for trtype\n", page); return -EINVAL; + found: memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); - port->disc_addr.trtype = nvmet_transport_names[i].type; + port->disc_addr.trtype = nvmet_transport[i].type; if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) nvmet_port_init_tsas_rdma(port); return count; @@ -327,7 +354,7 @@ static ssize_t nvmet_ns_device_path_store(struct config_item *item, kfree(ns->device_path); ret = -ENOMEM; - ns->device_path = kstrndup(page, len, GFP_KERNEL); + ns->device_path = kmemdup_nul(page, len, GFP_KERNEL); if (!ns->device_path) goto out_unlock; @@ -543,6 +570,31 @@ static ssize_t nvmet_ns_buffered_io_store(struct config_item *item, CONFIGFS_ATTR(nvmet_ns_, buffered_io); +static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_ns *ns = to_nvmet_ns(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + if (!val) + return -EINVAL; + + mutex_lock(&ns->subsys->lock); + if (!ns->enabled) { + pr_err("enable ns before revalidate.\n"); + mutex_unlock(&ns->subsys->lock); + return -EINVAL; + } + nvmet_ns_revalidate(ns); + mutex_unlock(&ns->subsys->lock); + return count; +} + +CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size); + static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_device_path, &nvmet_ns_attr_device_nguid, @@ -550,6 +602,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_ana_grpid, &nvmet_ns_attr_enable, &nvmet_ns_attr_buffered_io, + &nvmet_ns_attr_revalidate_size, #ifdef CONFIG_PCI_P2PDMA &nvmet_ns_attr_p2pmem, #endif @@ -963,7 +1016,7 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, return -EINVAL; } - new_model_number = kstrndup(page, len, GFP_KERNEL); + new_model_number = kmemdup_nul(page, len, GFP_KERNEL); if (!new_model_number) return -ENOMEM; @@ -987,6 +1040,28 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item, } CONFIGFS_ATTR(nvmet_subsys_, attr_model); +#ifdef CONFIG_BLK_DEV_INTEGRITY +static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, + char *page) +{ + return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support); +} + +static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_subsys *subsys = to_subsys(item); + bool pi_enable; + + if (strtobool(page, &pi_enable)) + return -EINVAL; + + subsys->pi_support = pi_enable; + return count; +} +CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); +#endif + static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_allow_any_host, &nvmet_subsys_attr_attr_version, @@ -994,6 +1069,9 @@ static struct configfs_attribute *nvmet_subsys_attrs[] = { &nvmet_subsys_attr_attr_cntlid_min, &nvmet_subsys_attr_attr_cntlid_max, &nvmet_subsys_attr_attr_model, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_subsys_attr_attr_pi_enable, +#endif NULL, }; @@ -1149,10 +1227,7 @@ static const struct config_item_type nvmet_referrals_type = { .ct_group_ops = &nvmet_referral_group_ops, }; -static struct { - enum nvme_ana_state state; - const char *name; -} nvmet_ana_state_names[] = { +static struct nvmet_type_name_map nvmet_ana_state[] = { { NVME_ANA_OPTIMIZED, "optimized" }, { NVME_ANA_NONOPTIMIZED, "non-optimized" }, { NVME_ANA_INACCESSIBLE, "inaccessible" }, @@ -1167,10 +1242,9 @@ static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item, enum nvme_ana_state state = grp->port->ana_state[grp->grpid]; int i; - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { - if (state != nvmet_ana_state_names[i].state) - continue; - return sprintf(page, "%s\n", nvmet_ana_state_names[i].name); + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { + if (state == nvmet_ana_state[i].type) + return sprintf(page, "%s\n", nvmet_ana_state[i].name); } return sprintf(page, "\n"); @@ -1180,10 +1254,11 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, const char *page, size_t count) { struct nvmet_ana_group *grp = to_ana_group(item); + enum nvme_ana_state *ana_state = grp->port->ana_state; int i; - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { - if (sysfs_streq(page, nvmet_ana_state_names[i].name)) + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { + if (sysfs_streq(page, nvmet_ana_state[i].name)) goto found; } @@ -1192,10 +1267,9 @@ static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, found: down_write(&nvmet_ana_sem); - grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state; + ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type; nvmet_ana_chgcnt++; up_write(&nvmet_ana_sem); - nvmet_port_send_ana_event(grp->port); return count; } @@ -1297,6 +1371,9 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, &nvmet_attr_param_inline_data_size, +#ifdef CONFIG_BLK_DEV_INTEGRITY + &nvmet_attr_param_pi_enable, +#endif NULL, }; @@ -1346,6 +1423,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, port->inline_data_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); + port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; config_group_init_type_name(&port->group, name, &nvmet_port_type); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b685f99d56a1..6392bcd30bd7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -134,15 +134,10 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) struct nvmet_async_event *aen; struct nvmet_req *req; - while (1) { - mutex_lock(&ctrl->lock); - aen = list_first_entry_or_null(&ctrl->async_events, - struct nvmet_async_event, entry); - if (!aen || !ctrl->nr_async_event_cmds) { - mutex_unlock(&ctrl->lock); - break; - } - + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { + aen = list_first_entry(&ctrl->async_events, + struct nvmet_async_event, entry); req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; if (status == 0) nvmet_set_result(req, nvmet_async_event_result(aen)); @@ -151,20 +146,21 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) kfree(aen); mutex_unlock(&ctrl->lock); + trace_nvmet_async_event(ctrl, req->cqe->result.u32); nvmet_req_complete(req, status); + mutex_lock(&ctrl->lock); } + mutex_unlock(&ctrl->lock); } static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) { - struct nvmet_req *req; + struct nvmet_async_event *aen, *tmp; mutex_lock(&ctrl->lock); - while (ctrl->nr_async_event_cmds) { - req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - mutex_unlock(&ctrl->lock); - nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); - mutex_lock(&ctrl->lock); + list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { + list_del(&aen->entry); + kfree(aen); } mutex_unlock(&ctrl->lock); } @@ -322,12 +318,21 @@ int nvmet_enable_port(struct nvmet_port *port) if (!try_module_get(ops->owner)) return -EINVAL; - ret = ops->add_port(port); - if (ret) { - module_put(ops->owner); - return ret; + /* + * If the user requested PI support and the transport isn't pi capable, + * don't enable the port. + */ + if (port->pi_enable && !ops->metadata_support) { + pr_err("T10-PI is not supported by transport type %d\n", + port->disc_addr.trtype); + ret = -EINVAL; + goto out_put; } + ret = ops->add_port(port); + if (ret) + goto out_put; + /* If the transport didn't set inline_data_size, then disable it. */ if (port->inline_data_size < 0) port->inline_data_size = 0; @@ -335,6 +340,10 @@ int nvmet_enable_port(struct nvmet_port *port) port->enabled = true; port->tr_ops = ops; return 0; + +out_put: + module_put(ops->owner); + return ret; } void nvmet_disable_port(struct nvmet_port *port) @@ -514,6 +523,19 @@ static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl, ns->nsid); } +void nvmet_ns_revalidate(struct nvmet_ns *ns) +{ + loff_t oldsize = ns->size; + + if (ns->bdev) + nvmet_bdev_ns_revalidate(ns); + else + nvmet_file_ns_revalidate(ns); + + if (oldsize != ns->size) + nvmet_ns_changed(ns->subsys, ns->nsid); +} + int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; @@ -764,10 +786,8 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * If this is the admin queue, complete all AERs so that our * queue doesn't have outstanding requests on it. */ - if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { + if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) nvmet_async_events_process(ctrl, status); - nvmet_async_events_free(ctrl); - } percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); @@ -873,8 +893,11 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sq = sq; req->ops = ops; req->sg = NULL; + req->metadata_sg = NULL; req->sg_cnt = 0; + req->metadata_sg_cnt = 0; req->transfer_len = 0; + req->metadata_len = 0; req->cqe->status = 0; req->cqe->sq_head = 0; req->ns = NULL; @@ -936,9 +959,9 @@ void nvmet_req_uninit(struct nvmet_req *req) } EXPORT_SYMBOL_GPL(nvmet_req_uninit); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) { - if (unlikely(data_len != req->transfer_len)) { + if (unlikely(len != req->transfer_len)) { req->error_loc = offsetof(struct nvme_common_command, dptr); nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); return false; @@ -946,7 +969,7 @@ bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) return true; } -EXPORT_SYMBOL_GPL(nvmet_check_data_len); +EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) { @@ -959,50 +982,90 @@ bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) return true; } -int nvmet_req_alloc_sgl(struct nvmet_req *req) +static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) { - struct pci_dev *p2p_dev = NULL; + return req->transfer_len - req->metadata_len; +} - if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { - if (req->sq->ctrl && req->ns) - p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, - req->ns->nsid); +static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req) +{ + req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt, + nvmet_data_transfer_len(req)); + if (!req->sg) + goto out_err; - req->p2p_dev = NULL; - if (req->sq->qid && p2p_dev) { - req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, - req->transfer_len); - if (req->sg) { - req->p2p_dev = p2p_dev; - return 0; - } - } + if (req->metadata_len) { + req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev, + &req->metadata_sg_cnt, req->metadata_len); + if (!req->metadata_sg) + goto out_free_sg; + } + return 0; +out_free_sg: + pci_p2pmem_free_sgl(req->p2p_dev, req->sg); +out_err: + return -ENOMEM; +} - /* - * If no P2P memory was available we fallback to using - * regular memory - */ +static bool nvmet_req_find_p2p_dev(struct nvmet_req *req) +{ + if (!IS_ENABLED(CONFIG_PCI_P2PDMA)) + return false; + + if (req->sq->ctrl && req->sq->qid && req->ns) { + req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, + req->ns->nsid); + if (req->p2p_dev) + return true; } - req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); + req->p2p_dev = NULL; + return false; +} + +int nvmet_req_alloc_sgls(struct nvmet_req *req) +{ + if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req)) + return 0; + + req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, + &req->sg_cnt); if (unlikely(!req->sg)) - return -ENOMEM; + goto out; + + if (req->metadata_len) { + req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, + &req->metadata_sg_cnt); + if (unlikely(!req->metadata_sg)) + goto out_free; + } return 0; +out_free: + sgl_free(req->sg); +out: + return -ENOMEM; } -EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); +EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); -void nvmet_req_free_sgl(struct nvmet_req *req) +void nvmet_req_free_sgls(struct nvmet_req *req) { - if (req->p2p_dev) + if (req->p2p_dev) { pci_p2pmem_free_sgl(req->p2p_dev, req->sg); - else + if (req->metadata_sg) + pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); + } else { sgl_free(req->sg); + if (req->metadata_sg) + sgl_free(req->metadata_sg); + } req->sg = NULL; + req->metadata_sg = NULL; req->sg_cnt = 0; + req->metadata_sg_cnt = 0; } -EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); +EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); static inline bool nvmet_cc_en(u32 cc) { @@ -1357,6 +1420,7 @@ static void nvmet_ctrl_free(struct kref *ref) ida_simple_remove(&cntlid_ida, ctrl->cntlid); + nvmet_async_events_free(ctrl); kfree(ctrl->sqs); kfree(ctrl->cqs); kfree(ctrl->changed_ns_list); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 0c2274b21e15..40cf0b6e6c9d 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -171,7 +171,7 @@ static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) u16 status = 0; void *buffer; - if (!nvmet_check_data_len(req, data_len)) + if (!nvmet_check_transfer_len(req, data_len)) return; if (req->cmd->get_log_page.lid != NVME_LOG_DISC) { @@ -244,7 +244,7 @@ static void nvmet_execute_disc_identify(struct nvmet_req *req) const char model[] = "Linux"; u16 status = 0; - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) return; if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) { @@ -298,7 +298,7 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { @@ -324,7 +324,7 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; switch (cdw10 & 0xff) { diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index feef15c38ec9..42bd12b8bf00 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -12,7 +12,7 @@ static void nvmet_execute_prop_set(struct nvmet_req *req) u64 val = le64_to_cpu(req->cmd->prop_set.value); u16 status = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_set.attrib & 1) { @@ -41,7 +41,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) u16 status = 0; u64 val = 0; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; if (req->cmd->prop_get.attrib & 1) { @@ -156,7 +156,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); @@ -197,6 +197,8 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } + ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; + uuid_copy(&ctrl->hostid, &d->hostid); status = nvmet_install_queue(ctrl, req); @@ -205,8 +207,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; } - pr_info("creating controller %d for subsystem %s for NQN %s.\n", - ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); + pr_info("creating controller %d for subsystem %s for NQN %s%s.\n", + ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, + ctrl->pi_support ? " T10-PI is enabled" : ""); req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); out: @@ -223,7 +226,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) return; d = kmalloc(sizeof(*d), GFP_KERNEL); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index a8ceb7721640..27fd3b5aa621 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -14,6 +14,7 @@ #include "nvmet.h" #include <linux/nvme-fc-driver.h> #include <linux/nvme-fc.h> +#include "../host/fc.h" /* *************************** Data Structures/Defines ****************** */ @@ -21,23 +22,21 @@ #define NVMET_LS_CTX_COUNT 256 -/* for this implementation, assume small single frame rqst/rsp */ -#define NVME_FC_MAX_LS_BUFFER_SIZE 2048 - struct nvmet_fc_tgtport; struct nvmet_fc_tgt_assoc; -struct nvmet_fc_ls_iod { - struct nvmefc_tgt_ls_req *lsreq; +struct nvmet_fc_ls_iod { /* for an LS RQST RCV */ + struct nvmefc_ls_rsp *lsrsp; struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ - struct list_head ls_list; /* tgtport->ls_list */ + struct list_head ls_rcv_list; /* tgtport->ls_rcv_list */ struct nvmet_fc_tgtport *tgtport; struct nvmet_fc_tgt_assoc *assoc; + void *hosthandle; - u8 *rqstbuf; - u8 *rspbuf; + union nvmefc_ls_requests *rqstbuf; + union nvmefc_ls_responses *rspbuf; u16 rqstdatalen; dma_addr_t rspdma; @@ -46,6 +45,18 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); +struct nvmet_fc_ls_req_op { /* for an LS RQST XMT */ + struct nvmefc_ls_req ls_req; + + struct nvmet_fc_tgtport *tgtport; + void *hosthandle; + + int ls_error; + struct list_head lsreq_list; /* tgtport->ls_req_list */ + bool req_queued; +}; + + /* desired maximum for a single sequence - if sg list allows it */ #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) @@ -83,7 +94,6 @@ struct nvmet_fc_fcp_iod { }; struct nvmet_fc_tgtport { - struct nvmet_fc_target_port fc_target_port; struct list_head tgt_list; /* nvmet_fc_target_list */ @@ -92,9 +102,11 @@ struct nvmet_fc_tgtport { struct nvmet_fc_ls_iod *iod; spinlock_t lock; - struct list_head ls_list; + struct list_head ls_rcv_list; + struct list_head ls_req_list; struct list_head ls_busylist; struct list_head assoc_list; + struct list_head host_list; struct ida assoc_cnt; struct nvmet_fc_port_entry *pe; struct kref ref; @@ -136,14 +148,26 @@ struct nvmet_fc_tgt_queue { struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ } __aligned(sizeof(unsigned long long)); +struct nvmet_fc_hostport { + struct nvmet_fc_tgtport *tgtport; + void *hosthandle; + struct list_head host_list; + struct kref ref; + u8 invalid; +}; + struct nvmet_fc_tgt_assoc { u64 association_id; u32 a_id; + atomic_t terminating; struct nvmet_fc_tgtport *tgtport; + struct nvmet_fc_hostport *hostport; + struct nvmet_fc_ls_iod *rcv_disconn; struct list_head a_list; struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; struct kref ref; struct work_struct del_work; + atomic_t del_work_active; }; @@ -227,6 +251,8 @@ static int nvmet_fc_tgtport_get(struct nvmet_fc_tgtport *tgtport); static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod); static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc); +static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_iod *iod); /* *********************** FC-NVME DMA Handling **************************** */ @@ -318,6 +344,188 @@ fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, } +/* ********************** FC-NVME LS XMT Handling ************************* */ + + +static void +__nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) +{ + struct nvmet_fc_tgtport *tgtport = lsop->tgtport; + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + + if (!lsop->req_queued) { + spin_unlock_irqrestore(&tgtport->lock, flags); + return; + } + + list_del(&lsop->lsreq_list); + + lsop->req_queued = false; + + spin_unlock_irqrestore(&tgtport->lock, flags); + + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); + + nvmet_fc_tgtport_put(tgtport); +} + +static int +__nvmet_fc_send_ls_req(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + struct nvmefc_ls_req *lsreq = &lsop->ls_req; + unsigned long flags; + int ret = 0; + + if (!tgtport->ops->ls_req) + return -EOPNOTSUPP; + + if (!nvmet_fc_tgtport_get(tgtport)) + return -ESHUTDOWN; + + lsreq->done = done; + lsop->req_queued = false; + INIT_LIST_HEAD(&lsop->lsreq_list); + + lsreq->rqstdma = fc_dma_map_single(tgtport->dev, lsreq->rqstaddr, + lsreq->rqstlen + lsreq->rsplen, + DMA_BIDIRECTIONAL); + if (fc_dma_mapping_error(tgtport->dev, lsreq->rqstdma)) { + ret = -EFAULT; + goto out_puttgtport; + } + lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; + + spin_lock_irqsave(&tgtport->lock, flags); + + list_add_tail(&lsop->lsreq_list, &tgtport->ls_req_list); + + lsop->req_queued = true; + + spin_unlock_irqrestore(&tgtport->lock, flags); + + ret = tgtport->ops->ls_req(&tgtport->fc_target_port, lsop->hosthandle, + lsreq); + if (ret) + goto out_unlink; + + return 0; + +out_unlink: + lsop->ls_error = ret; + spin_lock_irqsave(&tgtport->lock, flags); + lsop->req_queued = false; + list_del(&lsop->lsreq_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, + (lsreq->rqstlen + lsreq->rsplen), + DMA_BIDIRECTIONAL); +out_puttgtport: + nvmet_fc_tgtport_put(tgtport); + + return ret; +} + +static int +nvmet_fc_send_ls_req_async(struct nvmet_fc_tgtport *tgtport, + struct nvmet_fc_ls_req_op *lsop, + void (*done)(struct nvmefc_ls_req *req, int status)) +{ + /* don't wait for completion */ + + return __nvmet_fc_send_ls_req(tgtport, lsop, done); +} + +static void +nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) +{ + struct nvmet_fc_ls_req_op *lsop = + container_of(lsreq, struct nvmet_fc_ls_req_op, ls_req); + + __nvmet_fc_finish_ls_req(lsop); + + /* fc-nvme target doesn't care about success or failure of cmd */ + + kfree(lsop); +} + +/* + * This routine sends a FC-NVME LS to disconnect (aka terminate) + * the FC-NVME Association. Terminating the association also + * terminates the FC-NVME connections (per queue, both admin and io + * queues) that are part of the association. E.g. things are torn + * down, and the related FC-NVME Association ID and Connection IDs + * become invalid. + * + * The behavior of the fc-nvme target is such that it's + * understanding of the association and connections will implicitly + * be torn down. The action is implicit as it may be due to a loss of + * connectivity with the fc-nvme host, so the target may never get a + * response even if it tried. As such, the action of this routine + * is to asynchronously send the LS, ignore any results of the LS, and + * continue on with terminating the association. If the fc-nvme host + * is present and receives the LS, it too can tear down. + */ +static void +nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) +{ + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; + struct fcnvme_ls_disconnect_assoc_acc *discon_acc; + struct nvmet_fc_ls_req_op *lsop; + struct nvmefc_ls_req *lsreq; + int ret; + + /* + * If ls_req is NULL or no hosthandle, it's an older lldd and no + * message is normal. Otherwise, send unless the hostport has + * already been invalidated by the lldd. + */ + if (!tgtport->ops->ls_req || !assoc->hostport || + assoc->hostport->invalid) + return; + + lsop = kzalloc((sizeof(*lsop) + + sizeof(*discon_rqst) + sizeof(*discon_acc) + + tgtport->ops->lsrqst_priv_sz), GFP_KERNEL); + if (!lsop) { + dev_info(tgtport->dev, + "{%d:%d} send Disconnect Association failed: ENOMEM\n", + tgtport->fc_target_port.port_num, assoc->a_id); + return; + } + + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; + lsreq = &lsop->ls_req; + if (tgtport->ops->lsrqst_priv_sz) + lsreq->private = (void *)&discon_acc[1]; + else + lsreq->private = NULL; + + lsop->tgtport = tgtport; + lsop->hosthandle = assoc->hostport->hosthandle; + + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, + assoc->association_id); + + ret = nvmet_fc_send_ls_req_async(tgtport, lsop, + nvmet_fc_disconnect_assoc_done); + if (ret) { + dev_info(tgtport->dev, + "{%d:%d} XMT Disconnect Association failed: %d\n", + tgtport->fc_target_port.port_num, assoc->a_id, ret); + kfree(lsop); + } +} + + /* *********************** FC-NVME Port Management ************************ */ @@ -337,17 +545,18 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work); iod->tgtport = tgtport; - list_add_tail(&iod->ls_list, &tgtport->ls_list); + list_add_tail(&iod->ls_rcv_list, &tgtport->ls_rcv_list); - iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE, - GFP_KERNEL); + iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) + + sizeof(union nvmefc_ls_responses), + GFP_KERNEL); if (!iod->rqstbuf) goto out_fail; - iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE; + iod->rspbuf = (union nvmefc_ls_responses *)&iod->rqstbuf[1]; iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf, - NVME_FC_MAX_LS_BUFFER_SIZE, + sizeof(*iod->rspbuf), DMA_TO_DEVICE); if (fc_dma_mapping_error(tgtport->dev, iod->rspdma)) goto out_fail; @@ -357,12 +566,12 @@ nvmet_fc_alloc_ls_iodlist(struct nvmet_fc_tgtport *tgtport) out_fail: kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); for (iod--, i--; i >= 0; iod--, i--) { fc_dma_unmap_single(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); } kfree(iod); @@ -378,10 +587,10 @@ nvmet_fc_free_ls_iodlist(struct nvmet_fc_tgtport *tgtport) for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { fc_dma_unmap_single(tgtport->dev, - iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE, + iod->rspdma, sizeof(*iod->rspbuf), DMA_TO_DEVICE); kfree(iod->rqstbuf); - list_del(&iod->ls_list); + list_del(&iod->ls_rcv_list); } kfree(tgtport->iod); } @@ -393,10 +602,10 @@ nvmet_fc_alloc_ls_iod(struct nvmet_fc_tgtport *tgtport) unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); - iod = list_first_entry_or_null(&tgtport->ls_list, - struct nvmet_fc_ls_iod, ls_list); + iod = list_first_entry_or_null(&tgtport->ls_rcv_list, + struct nvmet_fc_ls_iod, ls_rcv_list); if (iod) - list_move_tail(&iod->ls_list, &tgtport->ls_busylist); + list_move_tail(&iod->ls_rcv_list, &tgtport->ls_busylist); spin_unlock_irqrestore(&tgtport->lock, flags); return iod; } @@ -409,7 +618,7 @@ nvmet_fc_free_ls_iod(struct nvmet_fc_tgtport *tgtport, unsigned long flags; spin_lock_irqsave(&tgtport->lock, flags); - list_move(&iod->ls_list, &tgtport->ls_list); + list_move(&iod->ls_rcv_list, &tgtport->ls_rcv_list); spin_unlock_irqrestore(&tgtport->lock, flags); } @@ -678,31 +887,33 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) struct nvmet_fc_fcp_iod *fod = queue->fod; struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr; unsigned long flags; - int i, writedataactive; + int i; bool disconnect; disconnect = atomic_xchg(&queue->connected, 0); + /* if not connected, nothing to do */ + if (!disconnect) + return; + spin_lock_irqsave(&queue->qlock, flags); /* abort outstanding io's */ for (i = 0; i < queue->sqsize; fod++, i++) { if (fod->active) { spin_lock(&fod->flock); fod->abort = true; - writedataactive = fod->writedataactive; - spin_unlock(&fod->flock); /* * only call lldd abort routine if waiting for * writedata. other outstanding ops should finish * on their own. */ - if (writedataactive) { - spin_lock(&fod->flock); + if (fod->writedataactive) { fod->aborted = true; spin_unlock(&fod->flock); tgtport->ops->fcp_abort( &tgtport->fc_target_port, fod->fcpreq); - } + } else + spin_unlock(&fod->flock); } } @@ -742,8 +953,7 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) flush_workqueue(queue->work_q); - if (disconnect) - nvmet_sq_destroy(&queue->nvme_sq); + nvmet_sq_destroy(&queue->nvme_sq); nvmet_fc_tgt_q_put(queue); } @@ -778,17 +988,114 @@ nvmet_fc_find_target_queue(struct nvmet_fc_tgtport *tgtport, } static void +nvmet_fc_hostport_free(struct kref *ref) +{ + struct nvmet_fc_hostport *hostport = + container_of(ref, struct nvmet_fc_hostport, ref); + struct nvmet_fc_tgtport *tgtport = hostport->tgtport; + unsigned long flags; + + spin_lock_irqsave(&tgtport->lock, flags); + list_del(&hostport->host_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + if (tgtport->ops->host_release && hostport->invalid) + tgtport->ops->host_release(hostport->hosthandle); + kfree(hostport); + nvmet_fc_tgtport_put(tgtport); +} + +static void +nvmet_fc_hostport_put(struct nvmet_fc_hostport *hostport) +{ + kref_put(&hostport->ref, nvmet_fc_hostport_free); +} + +static int +nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport) +{ + return kref_get_unless_zero(&hostport->ref); +} + +static void +nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport) +{ + /* if LLDD not implemented, leave as NULL */ + if (!hostport->hosthandle) + return; + + nvmet_fc_hostport_put(hostport); +} + +static struct nvmet_fc_hostport * +nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) +{ + struct nvmet_fc_hostport *newhost, *host, *match = NULL; + unsigned long flags; + + /* if LLDD not implemented, leave as NULL */ + if (!hosthandle) + return NULL; + + /* take reference for what will be the newly allocated hostport */ + if (!nvmet_fc_tgtport_get(tgtport)) + return ERR_PTR(-EINVAL); + + newhost = kzalloc(sizeof(*newhost), GFP_KERNEL); + if (!newhost) { + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(host, &tgtport->host_list, host_list) { + if (host->hosthandle == hosthandle && !host->invalid) { + if (nvmet_fc_hostport_get(host)) { + match = host; + break; + } + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); + /* no allocation - release reference */ + nvmet_fc_tgtport_put(tgtport); + return (match) ? match : ERR_PTR(-ENOMEM); + } + + newhost->tgtport = tgtport; + newhost->hosthandle = hosthandle; + INIT_LIST_HEAD(&newhost->host_list); + kref_init(&newhost->ref); + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry(host, &tgtport->host_list, host_list) { + if (host->hosthandle == hosthandle && !host->invalid) { + if (nvmet_fc_hostport_get(host)) { + match = host; + break; + } + } + } + if (match) { + kfree(newhost); + newhost = NULL; + /* releasing allocation - release reference */ + nvmet_fc_tgtport_put(tgtport); + } else + list_add_tail(&newhost->host_list, &tgtport->host_list); + spin_unlock_irqrestore(&tgtport->lock, flags); + + return (match) ? match : newhost; +} + +static void nvmet_fc_delete_assoc(struct work_struct *work) { struct nvmet_fc_tgt_assoc *assoc = container_of(work, struct nvmet_fc_tgt_assoc, del_work); nvmet_fc_delete_target_assoc(assoc); + atomic_set(&assoc->del_work_active, 0); nvmet_fc_tgt_a_put(assoc); } static struct nvmet_fc_tgt_assoc * -nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) +nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) { struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; unsigned long flags; @@ -805,13 +1112,19 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) goto out_free_assoc; if (!nvmet_fc_tgtport_get(tgtport)) - goto out_ida_put; + goto out_ida; + + assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle); + if (IS_ERR(assoc->hostport)) + goto out_put; assoc->tgtport = tgtport; assoc->a_id = idx; INIT_LIST_HEAD(&assoc->a_list); kref_init(&assoc->ref); INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); + atomic_set(&assoc->del_work_active, 0); + atomic_set(&assoc->terminating, 0); while (needrandom) { get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); @@ -819,11 +1132,12 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) spin_lock_irqsave(&tgtport->lock, flags); needrandom = false; - list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) + list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) { if (ran == tmpassoc->association_id) { needrandom = true; break; } + } if (!needrandom) { assoc->association_id = ran; list_add_tail(&assoc->a_list, &tgtport->assoc_list); @@ -833,7 +1147,9 @@ nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) return assoc; -out_ida_put: +out_put: + nvmet_fc_tgtport_put(tgtport); +out_ida: ida_simple_remove(&tgtport->assoc_cnt, idx); out_free_assoc: kfree(assoc); @@ -846,12 +1162,24 @@ nvmet_fc_target_assoc_free(struct kref *ref) struct nvmet_fc_tgt_assoc *assoc = container_of(ref, struct nvmet_fc_tgt_assoc, ref); struct nvmet_fc_tgtport *tgtport = assoc->tgtport; + struct nvmet_fc_ls_iod *oldls; unsigned long flags; + /* Send Disconnect now that all i/o has completed */ + nvmet_fc_xmt_disconnect_assoc(assoc); + + nvmet_fc_free_hostport(assoc->hostport); spin_lock_irqsave(&tgtport->lock, flags); list_del(&assoc->a_list); + oldls = assoc->rcv_disconn; spin_unlock_irqrestore(&tgtport->lock, flags); + /* if pending Rcv Disconnect Association LS, send rsp now */ + if (oldls) + nvmet_fc_xmt_ls_rsp(tgtport, oldls); ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id); + dev_info(tgtport->dev, + "{%d:%d} Association freed\n", + tgtport->fc_target_port.port_num, assoc->a_id); kfree(assoc); nvmet_fc_tgtport_put(tgtport); } @@ -874,7 +1202,13 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) struct nvmet_fc_tgtport *tgtport = assoc->tgtport; struct nvmet_fc_tgt_queue *queue; unsigned long flags; - int i; + int i, terminating; + + terminating = atomic_xchg(&assoc->terminating, 1); + + /* if already terminating, do nothing */ + if (terminating) + return; spin_lock_irqsave(&tgtport->lock, flags); for (i = NVMET_NR_QUEUES; i >= 0; i--) { @@ -890,6 +1224,10 @@ nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc) } spin_unlock_irqrestore(&tgtport->lock, flags); + dev_info(tgtport->dev, + "{%d:%d} Association deleted\n", + tgtport->fc_target_port.port_num, assoc->a_id); + nvmet_fc_tgt_a_put(assoc); } @@ -1048,16 +1386,21 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, newrec->fc_target_port.node_name = pinfo->node_name; newrec->fc_target_port.port_name = pinfo->port_name; - newrec->fc_target_port.private = &newrec[1]; + if (template->target_priv_sz) + newrec->fc_target_port.private = &newrec[1]; + else + newrec->fc_target_port.private = NULL; newrec->fc_target_port.port_id = pinfo->port_id; newrec->fc_target_port.port_num = idx; INIT_LIST_HEAD(&newrec->tgt_list); newrec->dev = dev; newrec->ops = template; spin_lock_init(&newrec->lock); - INIT_LIST_HEAD(&newrec->ls_list); + INIT_LIST_HEAD(&newrec->ls_rcv_list); + INIT_LIST_HEAD(&newrec->ls_req_list); INIT_LIST_HEAD(&newrec->ls_busylist); INIT_LIST_HEAD(&newrec->assoc_list); + INIT_LIST_HEAD(&newrec->host_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); newrec->max_sg_cnt = template->max_sgl_segments; @@ -1134,17 +1477,90 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) { struct nvmet_fc_tgt_assoc *assoc, *next; unsigned long flags; + int ret; + + spin_lock_irqsave(&tgtport->lock, flags); + list_for_each_entry_safe(assoc, next, + &tgtport->assoc_list, a_list) { + if (!nvmet_fc_tgt_a_get(assoc)) + continue; + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ + nvmet_fc_tgt_a_put(assoc); + } + } + spin_unlock_irqrestore(&tgtport->lock, flags); +} + +/** + * nvmet_fc_invalidate_host - transport entry point called by an LLDD + * to remove references to a hosthandle for LS's. + * + * The nvmet-fc layer ensures that any references to the hosthandle + * on the targetport are forgotten (set to NULL). The LLDD will + * typically call this when a login with a remote host port has been + * lost, thus LS's for the remote host port are no longer possible. + * + * If an LS request is outstanding to the targetport/hosthandle (or + * issued concurrently with the call to invalidate the host), the + * LLDD is responsible for terminating/aborting the LS and completing + * the LS request. It is recommended that these terminations/aborts + * occur after calling to invalidate the host handle to avoid additional + * retries by the nvmet-fc transport. The nvmet-fc transport may + * continue to reference host handle while it cleans up outstanding + * NVME associations. The nvmet-fc transport will call the + * ops->host_release() callback to notify the LLDD that all references + * are complete and the related host handle can be recovered. + * Note: if there are no references, the callback may be called before + * the invalidate host call returns. + * + * @target_port: pointer to the (registered) target port that a prior + * LS was received on and which supplied the transport the + * hosthandle. + * @hosthandle: the handle (pointer) that represents the host port + * that no longer has connectivity and that LS's should + * no longer be directed to. + */ +void +nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, + void *hosthandle) +{ + struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); + struct nvmet_fc_tgt_assoc *assoc, *next; + unsigned long flags; + bool noassoc = true; + int ret; spin_lock_irqsave(&tgtport->lock, flags); list_for_each_entry_safe(assoc, next, &tgtport->assoc_list, a_list) { + if (!assoc->hostport || + assoc->hostport->hosthandle != hosthandle) + continue; if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!schedule_work(&assoc->del_work)) + assoc->hostport->invalid = 1; + noassoc = false; + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); + } } spin_unlock_irqrestore(&tgtport->lock, flags); + + /* if there's nothing to wait for - call the callback */ + if (noassoc && tgtport->ops->host_release) + tgtport->ops->host_release(hosthandle); } +EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host); /* * nvmet layer has called to terminate an association @@ -1157,6 +1573,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) struct nvmet_fc_tgt_queue *queue; unsigned long flags; bool found_ctrl = false; + int ret; /* this is a bit ugly, but don't want to make locks layered */ spin_lock_irqsave(&nvmet_fc_tgtlock, flags); @@ -1180,8 +1597,14 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!schedule_work(&assoc->del_work)) + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); + if (ret == 0) { + if (!schedule_work(&assoc->del_work)) + nvmet_fc_tgt_a_put(assoc); + } else { + /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); + } return; } @@ -1211,6 +1634,13 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) /* terminate any outstanding associations */ __nvmet_fc_free_assocs(tgtport); + /* + * should terminate LS's as well. However, LS's will be generated + * at the tail end of association termination, so they likely don't + * exist yet. And even if they did, it's worthwhile to just let + * them finish and targetport ref counting will clean things up. + */ + nvmet_fc_tgtport_put(tgtport); return 0; @@ -1218,113 +1648,15 @@ nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *target_port) EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); -/* *********************** FC-NVME LS Handling **************************** */ - - -static void -nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) -{ - struct fcnvme_ls_acc_hdr *acc = buf; - - acc->w0.ls_cmd = ls_cmd; - acc->desc_list_len = desc_len; - acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); - acc->rqst.desc_len = - fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); - acc->rqst.w0.ls_cmd = rqst_ls_cmd; -} +/* ********************** FC-NVME LS RCV Handling ************************* */ -static int -nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, - u8 reason, u8 explanation, u8 vendor) -{ - struct fcnvme_ls_rjt *rjt = buf; - - nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, - fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), - ls_cmd); - rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); - rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); - rjt->rjt.reason_code = reason; - rjt->rjt.reason_explanation = explanation; - rjt->rjt.vendor = vendor; - - return sizeof(struct fcnvme_ls_rjt); -} - -/* Validation Error indexes into the string table below */ -enum { - VERR_NO_ERROR = 0, - VERR_CR_ASSOC_LEN = 1, - VERR_CR_ASSOC_RQST_LEN = 2, - VERR_CR_ASSOC_CMD = 3, - VERR_CR_ASSOC_CMD_LEN = 4, - VERR_ERSP_RATIO = 5, - VERR_ASSOC_ALLOC_FAIL = 6, - VERR_QUEUE_ALLOC_FAIL = 7, - VERR_CR_CONN_LEN = 8, - VERR_CR_CONN_RQST_LEN = 9, - VERR_ASSOC_ID = 10, - VERR_ASSOC_ID_LEN = 11, - VERR_NO_ASSOC = 12, - VERR_CONN_ID = 13, - VERR_CONN_ID_LEN = 14, - VERR_NO_CONN = 15, - VERR_CR_CONN_CMD = 16, - VERR_CR_CONN_CMD_LEN = 17, - VERR_DISCONN_LEN = 18, - VERR_DISCONN_RQST_LEN = 19, - VERR_DISCONN_CMD = 20, - VERR_DISCONN_CMD_LEN = 21, - VERR_DISCONN_SCOPE = 22, - VERR_RS_LEN = 23, - VERR_RS_RQST_LEN = 24, - VERR_RS_CMD = 25, - VERR_RS_CMD_LEN = 26, - VERR_RS_RCTL = 27, - VERR_RS_RO = 28, -}; - -static char *validation_errors[] = { - "OK", - "Bad CR_ASSOC Length", - "Bad CR_ASSOC Rqst Length", - "Not CR_ASSOC Cmd", - "Bad CR_ASSOC Cmd Length", - "Bad Ersp Ratio", - "Association Allocation Failed", - "Queue Allocation Failed", - "Bad CR_CONN Length", - "Bad CR_CONN Rqst Length", - "Not Association ID", - "Bad Association ID Length", - "No Association", - "Not Connection ID", - "Bad Connection ID Length", - "No Connection", - "Not CR_CONN Cmd", - "Bad CR_CONN Cmd Length", - "Bad DISCONN Length", - "Bad DISCONN Rqst Length", - "Not DISCONN Cmd", - "Bad DISCONN Cmd Length", - "Bad Disconnect Scope", - "Bad RS Length", - "Bad RS Rqst Length", - "Not RS Cmd", - "Bad RS Cmd Length", - "Bad RS R_CTL", - "Bad RS Relative Offset", -}; static void nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_cr_assoc_rqst *rqst = - (struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf; - struct fcnvme_ls_cr_assoc_acc *acc = - (struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf; + struct fcnvme_ls_cr_assoc_rqst *rqst = &iod->rqstbuf->rq_cr_assoc; + struct fcnvme_ls_cr_assoc_acc *acc = &iod->rspbuf->rsp_cr_assoc; struct nvmet_fc_tgt_queue *queue; int ret = 0; @@ -1356,7 +1688,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, else { /* new association w/ admin queue */ - iod->assoc = nvmet_fc_alloc_target_assoc(tgtport); + iod->assoc = nvmet_fc_alloc_target_assoc( + tgtport, iod->hosthandle); if (!iod->assoc) ret = VERR_ASSOC_ALLOC_FAIL; else { @@ -1371,8 +1704,8 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Association LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, + sizeof(*acc), rqst->w0.ls_cmd, FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); return; @@ -1382,11 +1715,15 @@ nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, atomic_set(&queue->connected, 1); queue->sqhd = 0; /* best place to init value */ + dev_info(tgtport->dev, + "{%d:%d} Association created\n", + tgtport->fc_target_port.port_num, iod->assoc->a_id); + /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_cr_assoc_acc)), FCNVME_LS_CREATE_ASSOCIATION); @@ -1407,10 +1744,8 @@ static void nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_cr_conn_rqst *rqst = - (struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf; - struct fcnvme_ls_cr_conn_acc *acc = - (struct fcnvme_ls_cr_conn_acc *)iod->rspbuf; + struct fcnvme_ls_cr_conn_rqst *rqst = &iod->rqstbuf->rq_cr_conn; + struct fcnvme_ls_cr_conn_acc *acc = &iod->rspbuf->rsp_cr_conn; struct nvmet_fc_tgt_queue *queue; int ret = 0; @@ -1462,8 +1797,8 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, dev_err(tgtport->dev, "Create Connection LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, + sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : FCNVME_RJT_RC_LOGIC, @@ -1477,9 +1812,9 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)), FCNVME_LS_CREATE_CONNECTION); acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID); @@ -1491,46 +1826,28 @@ nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, be16_to_cpu(rqst->connect_cmd.qid))); } -static void +/* + * Returns true if the LS response is to be transmit + * Returns false if the LS response is to be delayed + */ +static int nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { struct fcnvme_ls_disconnect_assoc_rqst *rqst = - (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf; + &iod->rqstbuf->rq_dis_assoc; struct fcnvme_ls_disconnect_assoc_acc *acc = - (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf; - struct nvmet_fc_tgt_assoc *assoc; + &iod->rspbuf->rsp_dis_assoc; + struct nvmet_fc_tgt_assoc *assoc = NULL; + struct nvmet_fc_ls_iod *oldls = NULL; + unsigned long flags; int ret = 0; memset(acc, 0, sizeof(*acc)); - if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) - ret = VERR_DISCONN_LEN; - else if (rqst->desc_list_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) - ret = VERR_DISCONN_RQST_LEN; - else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) - ret = VERR_ASSOC_ID; - else if (rqst->associd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_assoc_id))) - ret = VERR_ASSOC_ID_LEN; - else if (rqst->discon_cmd.desc_tag != - cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) - ret = VERR_DISCONN_CMD; - else if (rqst->discon_cmd.desc_len != - fcnvme_lsdesc_len( - sizeof(struct fcnvme_lsdesc_disconn_cmd))) - ret = VERR_DISCONN_CMD_LEN; - /* - * As the standard changed on the LS, check if old format and scope - * something other than Association (e.g. 0). - */ - else if (rqst->discon_cmd.rsvd8[0]) - ret = VERR_DISCONN_SCOPE; - else { - /* match an active association */ + ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst); + if (!ret) { + /* match an active association - takes an assoc ref if !NULL */ assoc = nvmet_fc_find_target_assoc(tgtport, be64_to_cpu(rqst->associd.association_id)); iod->assoc = assoc; @@ -1538,34 +1855,63 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, ret = VERR_NO_ASSOC; } - if (ret) { + if (ret || !assoc) { dev_err(tgtport->dev, "Disconnect LS failed: %s\n", validation_errors[ret]); - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, + sizeof(*acc), rqst->w0.ls_cmd, (ret == VERR_NO_ASSOC) ? FCNVME_RJT_RC_INV_ASSOC : - (ret == VERR_NO_CONN) ? - FCNVME_RJT_RC_INV_CONN : - FCNVME_RJT_RC_LOGIC, + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); - return; + return true; } /* format a response */ - iod->lsreq->rsplen = sizeof(*acc); + iod->lsrsp->rsplen = sizeof(*acc); - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( sizeof(struct fcnvme_ls_disconnect_assoc_acc)), FCNVME_LS_DISCONNECT_ASSOC); /* release get taken in nvmet_fc_find_target_assoc */ - nvmet_fc_tgt_a_put(iod->assoc); + nvmet_fc_tgt_a_put(assoc); + + /* + * The rules for LS response says the response cannot + * go back until ABTS's have been sent for all outstanding + * I/O and a Disconnect Association LS has been sent. + * So... save off the Disconnect LS to send the response + * later. If there was a prior LS already saved, replace + * it with the newer one and send a can't perform reject + * on the older one. + */ + spin_lock_irqsave(&tgtport->lock, flags); + oldls = assoc->rcv_disconn; + assoc->rcv_disconn = iod; + spin_unlock_irqrestore(&tgtport->lock, flags); + + nvmet_fc_delete_target_assoc(assoc); - nvmet_fc_delete_target_assoc(iod->assoc); + if (oldls) { + dev_info(tgtport->dev, + "{%d:%d} Multiple Disconnect Association LS's " + "received\n", + tgtport->fc_target_port.port_num, assoc->a_id); + /* overwrite good response with bogus failure */ + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, + sizeof(*iod->rspbuf), + /* ok to use rqst, LS is same */ + rqst->w0.ls_cmd, + FCNVME_RJT_RC_UNAB, + FCNVME_RJT_EXP_NONE, 0); + nvmet_fc_xmt_ls_rsp(tgtport, oldls); + } + + return false; } @@ -1577,13 +1923,13 @@ static void nvmet_fc_fcp_nvme_cmd_done(struct nvmet_req *nvme_req); static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; static void -nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) +nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) { - struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private; + struct nvmet_fc_ls_iod *iod = lsrsp->nvme_fc_private; struct nvmet_fc_tgtport *tgtport = iod->tgtport; fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); nvmet_fc_free_ls_iod(tgtport, iod); nvmet_fc_tgtport_put(tgtport); } @@ -1595,11 +1941,11 @@ nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, int ret; fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma, - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); + sizeof(*iod->rspbuf), DMA_TO_DEVICE); - ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq); + ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp); if (ret) - nvmet_fc_xmt_ls_rsp_done(iod->lsreq); + nvmet_fc_xmt_ls_rsp_done(iod->lsrsp); } /* @@ -1609,15 +1955,15 @@ static void nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_rqst_w0 *w0 = - (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf; + struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0; + bool sendrsp = true; - iod->lsreq->nvmet_fc_private = iod; - iod->lsreq->rspbuf = iod->rspbuf; - iod->lsreq->rspdma = iod->rspdma; - iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done; + iod->lsrsp->nvme_fc_private = iod; + iod->lsrsp->rspbuf = iod->rspbuf; + iod->lsrsp->rspdma = iod->rspdma; + iod->lsrsp->done = nvmet_fc_xmt_ls_rsp_done; /* Be preventative. handlers will later set to valid length */ - iod->lsreq->rsplen = 0; + iod->lsrsp->rsplen = 0; iod->assoc = NULL; @@ -1637,15 +1983,16 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, break; case FCNVME_LS_DISCONNECT_ASSOC: /* Terminate a Queue/Connection or the Association */ - nvmet_fc_ls_disconnect(tgtport, iod); + sendrsp = nvmet_fc_ls_disconnect(tgtport, iod); break; default: - iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, - NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, + iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, + sizeof(*iod->rspbuf), w0->ls_cmd, FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); } - nvmet_fc_xmt_ls_rsp(tgtport, iod); + if (sendrsp) + nvmet_fc_xmt_ls_rsp(tgtport, iod); } /* @@ -1674,35 +2021,53 @@ nvmet_fc_handle_ls_rqst_work(struct work_struct *work) * * @target_port: pointer to the (registered) target port the LS was * received on. - * @lsreq: pointer to a lsreq request structure to be used to reference + * @lsrsp: pointer to a lsrsp structure to be used to reference * the exchange corresponding to the LS. * @lsreqbuf: pointer to the buffer containing the LS Request * @lsreqbuf_len: length, in bytes, of the received LS request */ int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, - struct nvmefc_tgt_ls_req *lsreq, + void *hosthandle, + struct nvmefc_ls_rsp *lsrsp, void *lsreqbuf, u32 lsreqbuf_len) { struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); struct nvmet_fc_ls_iod *iod; - - if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE) + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; + + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { + dev_info(tgtport->dev, + "RCV %s LS failed: payload too large (%d)\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : "", + lsreqbuf_len); return -E2BIG; + } - if (!nvmet_fc_tgtport_get(tgtport)) + if (!nvmet_fc_tgtport_get(tgtport)) { + dev_info(tgtport->dev, + "RCV %s LS failed: target deleting\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); return -ESHUTDOWN; + } iod = nvmet_fc_alloc_ls_iod(tgtport); if (!iod) { + dev_info(tgtport->dev, + "RCV %s LS failed: context allocation failed\n", + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? + nvmefc_ls_names[w0->ls_cmd] : ""); nvmet_fc_tgtport_put(tgtport); return -ENOENT; } - iod->lsreq = lsreq; + iod->lsrsp = lsrsp; iod->fcpreq = NULL; memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len); iod->rqstdatalen = lsreqbuf_len; + iod->hosthandle = hosthandle; schedule_work(&iod->work); diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index f69ce66e2d44..2ff1d1334a03 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -208,10 +208,13 @@ struct fcloop_rport { }; struct fcloop_tport { - struct nvmet_fc_target_port *targetport; - struct nvme_fc_remote_port *remoteport; - struct fcloop_nport *nport; - struct fcloop_lport *lport; + struct nvmet_fc_target_port *targetport; + struct nvme_fc_remote_port *remoteport; + struct fcloop_nport *nport; + struct fcloop_lport *lport; + spinlock_t lock; + struct list_head ls_list; + struct work_struct ls_work; }; struct fcloop_nport { @@ -228,7 +231,8 @@ struct fcloop_nport { struct fcloop_lsreq { struct nvmefc_ls_req *lsreq; - struct nvmefc_tgt_ls_req tgt_ls_req; + struct nvmefc_ls_rsp ls_rsp; + int lsdir; /* H2T or T2H */ int status; struct list_head ls_list; /* fcloop_rport->ls_list */ }; @@ -267,9 +271,9 @@ struct fcloop_ini_fcpreq { }; static inline struct fcloop_lsreq * -tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) +ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp) { - return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req); + return container_of(lsrsp, struct fcloop_lsreq, ls_rsp); } static inline struct fcloop_fcpreq * @@ -323,7 +327,7 @@ fcloop_rport_lsrqst_work(struct work_struct *work) } static int -fcloop_ls_req(struct nvme_fc_local_port *localport, +fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) { @@ -344,27 +348,28 @@ fcloop_ls_req(struct nvme_fc_local_port *localport, } tls_req->status = 0; - ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, - lsreq->rqstaddr, lsreq->rqstlen); + ret = nvmet_fc_rcv_ls_req(rport->targetport, rport, + &tls_req->ls_rsp, + lsreq->rqstaddr, lsreq->rqstlen); return ret; } static int -fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, - struct nvmefc_tgt_ls_req *tgt_lsreq) +fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, + struct nvmefc_ls_rsp *lsrsp) { - struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); struct nvmefc_ls_req *lsreq = tls_req->lsreq; struct fcloop_tport *tport = targetport->private; struct nvme_fc_remote_port *remoteport = tport->remoteport; struct fcloop_rport *rport; - memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, - ((lsreq->rsplen < tgt_lsreq->rsplen) ? - lsreq->rsplen : tgt_lsreq->rsplen)); + memcpy(lsreq->rspaddr, lsrsp->rspbuf, + ((lsreq->rsplen < lsrsp->rsplen) ? + lsreq->rsplen : lsrsp->rsplen)); - tgt_lsreq->done(tgt_lsreq); + lsrsp->done(lsrsp); if (remoteport) { rport = remoteport->private; @@ -377,6 +382,99 @@ fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, return 0; } +static void +fcloop_tport_lsrqst_work(struct work_struct *work) +{ + struct fcloop_tport *tport = + container_of(work, struct fcloop_tport, ls_work); + struct fcloop_lsreq *tls_req; + + spin_lock(&tport->lock); + for (;;) { + tls_req = list_first_entry_or_null(&tport->ls_list, + struct fcloop_lsreq, ls_list); + if (!tls_req) + break; + + list_del(&tls_req->ls_list); + spin_unlock(&tport->lock); + + tls_req->lsreq->done(tls_req->lsreq, tls_req->status); + /* + * callee may free memory containing tls_req. + * do not reference lsreq after this. + */ + + spin_lock(&tport->lock); + } + spin_unlock(&tport->lock); +} + +static int +fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, + struct nvmefc_ls_req *lsreq) +{ + struct fcloop_lsreq *tls_req = lsreq->private; + struct fcloop_tport *tport = targetport->private; + int ret = 0; + + /* + * hosthandle should be the dst.rport value. + * hosthandle ignored as fcloop currently is + * 1:1 tgtport vs remoteport + */ + tls_req->lsreq = lsreq; + INIT_LIST_HEAD(&tls_req->ls_list); + + if (!tport->remoteport) { + tls_req->status = -ECONNREFUSED; + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); + schedule_work(&tport->ls_work); + return ret; + } + + tls_req->status = 0; + ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp, + lsreq->rqstaddr, lsreq->rqstlen); + + return ret; +} + +static int +fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_rsp *lsrsp) +{ + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); + struct nvmefc_ls_req *lsreq = tls_req->lsreq; + struct fcloop_rport *rport = remoteport->private; + struct nvmet_fc_target_port *targetport = rport->targetport; + struct fcloop_tport *tport; + + memcpy(lsreq->rspaddr, lsrsp->rspbuf, + ((lsreq->rsplen < lsrsp->rsplen) ? + lsreq->rsplen : lsrsp->rsplen)); + lsrsp->done(lsrsp); + + if (targetport) { + tport = targetport->private; + spin_lock(&tport->lock); + list_add_tail(&tport->ls_list, &tls_req->ls_list); + spin_unlock(&tport->lock); + schedule_work(&tport->ls_work); + } + + return 0; +} + +static void +fcloop_t2h_host_release(void *hosthandle) +{ + /* host handle ignored for now */ +} + /* * Simulate reception of RSCN and converting it to a initiator transport * call to rescan a remote port. @@ -762,13 +860,19 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, } static void -fcloop_ls_abort(struct nvme_fc_local_port *localport, +fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, struct nvmefc_ls_req *lsreq) { } static void +fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport, + void *hosthandle, struct nvmefc_ls_req *lsreq) +{ +} + +static void fcloop_fcp_abort(struct nvme_fc_local_port *localport, struct nvme_fc_remote_port *remoteport, void *hw_queue_handle, @@ -867,6 +971,7 @@ fcloop_targetport_delete(struct nvmet_fc_target_port *targetport) { struct fcloop_tport *tport = targetport->private; + flush_work(&tport->ls_work); fcloop_nport_put(tport->nport); } @@ -879,10 +984,11 @@ static struct nvme_fc_port_template fctemplate = { .remoteport_delete = fcloop_remoteport_delete, .create_queue = fcloop_create_queue, .delete_queue = fcloop_delete_queue, - .ls_req = fcloop_ls_req, + .ls_req = fcloop_h2t_ls_req, .fcp_io = fcloop_fcp_req, - .ls_abort = fcloop_ls_abort, + .ls_abort = fcloop_h2t_ls_abort, .fcp_abort = fcloop_fcp_abort, + .xmt_ls_rsp = fcloop_t2h_xmt_ls_rsp, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, @@ -896,11 +1002,14 @@ static struct nvme_fc_port_template fctemplate = { static struct nvmet_fc_target_template tgttemplate = { .targetport_delete = fcloop_targetport_delete, - .xmt_ls_rsp = fcloop_xmt_ls_rsp, + .xmt_ls_rsp = fcloop_h2t_xmt_ls_rsp, .fcp_op = fcloop_fcp_op, .fcp_abort = fcloop_tgt_fcp_abort, .fcp_req_release = fcloop_fcp_req_release, .discovery_event = fcloop_tgt_discovery_evt, + .ls_req = fcloop_t2h_ls_req, + .ls_abort = fcloop_t2h_ls_abort, + .host_release = fcloop_t2h_host_release, .max_hw_queues = FCLOOP_HW_QUEUES, .max_sgl_segments = FCLOOP_SGL_SEGS, .max_dif_sgl_segments = FCLOOP_SGL_SEGS, @@ -909,6 +1018,7 @@ static struct nvmet_fc_target_template tgttemplate = { .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), + .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), }; static ssize_t @@ -1258,6 +1368,9 @@ fcloop_create_target_port(struct device *dev, struct device_attribute *attr, tport->nport = nport; tport->lport = nport->lport; nport->tport = tport; + spin_lock_init(&tport->lock); + INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work); + INIT_LIST_HEAD(&tport->ls_list); return count; } diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 26f50c23b82e..3dd6f566a240 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -47,6 +47,22 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id) id->nows = to0based(ql->io_opt / ql->logical_block_size); } +static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) +{ + struct blk_integrity *bi = bdev_get_integrity(ns->bdev); + + if (bi) { + ns->metadata_size = bi->tuple_size; + if (bi->profile == &t10_pi_type1_crc) + ns->pi_type = NVME_NS_DPS_PI_TYPE1; + else if (bi->profile == &t10_pi_type3_crc) + ns->pi_type = NVME_NS_DPS_PI_TYPE3; + else + /* Unsupported metadata type */ + ns->metadata_size = 0; + } +} + int nvmet_bdev_ns_enable(struct nvmet_ns *ns) { int ret; @@ -64,6 +80,12 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns) } ns->size = i_size_read(ns->bdev->bd_inode); ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); + + ns->pi_type = 0; + ns->metadata_size = 0; + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) + nvmet_bdev_ns_enable_integrity(ns); + return 0; } @@ -75,6 +97,11 @@ void nvmet_bdev_ns_disable(struct nvmet_ns *ns) } } +void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) +{ + ns->size = i_size_read(ns->bdev->bd_inode); +} + static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) { u16 status = NVME_SC_SUCCESS; @@ -142,6 +169,61 @@ static void nvmet_bio_done(struct bio *bio) bio_put(bio); } +#ifdef CONFIG_BLK_DEV_INTEGRITY +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + struct blk_integrity *bi; + struct bio_integrity_payload *bip; + struct block_device *bdev = req->ns->bdev; + int rc; + size_t resid, len; + + bi = bdev_get_integrity(bdev); + if (unlikely(!bi)) { + pr_err("Unable to locate bio_integrity\n"); + return -ENODEV; + } + + bip = bio_integrity_alloc(bio, GFP_NOIO, + min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES)); + if (IS_ERR(bip)) { + pr_err("Unable to allocate bio_integrity_payload\n"); + return PTR_ERR(bip); + } + + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); + /* virtual start sector must be in integrity interval units */ + bip_set_seed(bip, bio->bi_iter.bi_sector >> + (bi->interval_exp - SECTOR_SHIFT)); + + resid = bip->bip_iter.bi_size; + while (resid > 0 && sg_miter_next(miter)) { + len = min_t(size_t, miter->length, resid); + rc = bio_integrity_add_page(bio, miter->page, len, + offset_in_page(miter->addr)); + if (unlikely(rc != len)) { + pr_err("bio_integrity_add_page() failed; %d\n", rc); + sg_miter_stop(miter); + return -ENOMEM; + } + + resid -= len; + if (len < miter->length) + miter->consumed -= miter->length - len; + } + sg_miter_stop(miter); + + return 0; +} +#else +static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, + struct sg_mapping_iter *miter) +{ + return -EINVAL; +} +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + static void nvmet_bdev_execute_rw(struct nvmet_req *req) { int sg_cnt = req->sg_cnt; @@ -149,9 +231,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) struct scatterlist *sg; struct blk_plug plug; sector_t sector; - int op, i; + int op, i, rc; + struct sg_mapping_iter prot_miter; + unsigned int iter_flags; + unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_transfer_len(req, total_len)) return; if (!req->sg_cnt) { @@ -163,8 +248,10 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) op |= REQ_FUA; + iter_flags = SG_MITER_TO_SG; } else { op = REQ_OP_READ; + iter_flags = SG_MITER_FROM_SG; } if (is_pci_p2pdma_page(sg_page(req->sg))) @@ -186,11 +273,24 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) bio->bi_opf = op; blk_start_plug(&plug); + if (req->metadata_len) + sg_miter_start(&prot_miter, req->metadata_sg, + req->metadata_sg_cnt, iter_flags); + for_each_sg(req->sg, sg, req->sg_cnt, i) { while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) != sg->length) { struct bio *prev = bio; + if (req->metadata_len) { + rc = nvmet_bdev_alloc_bip(req, bio, + &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; @@ -204,6 +304,14 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) sg_cnt--; } + if (req->metadata_len) { + rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); + if (unlikely(rc)) { + bio_io_error(bio); + return; + } + } + submit_bio(bio); blk_finish_plug(&plug); } @@ -212,7 +320,7 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) { struct bio *bio = &req->b.inline_bio; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); @@ -304,7 +412,7 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) sector_t nr_sector; int ret; - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; sector = le64_to_cpu(write_zeroes->slba) << @@ -331,6 +439,8 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_read: case nvme_cmd_write: req->execute = nvmet_bdev_execute_rw; + if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) + req->metadata_len = nvmet_rw_metadata_len(req); return 0; case nvme_cmd_flush: req->execute = nvmet_bdev_execute_flush; diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index cd5670b83118..0abbefd9925e 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -13,6 +13,18 @@ #define NVMET_MAX_MPOOL_BVEC 16 #define NVMET_MIN_MPOOL_OBJ 16 +int nvmet_file_ns_revalidate(struct nvmet_ns *ns) +{ + struct kstat stat; + int ret; + + ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE, + AT_STATX_FORCE_SYNC); + if (!ret) + ns->size = stat.size; + return ret; +} + void nvmet_file_ns_disable(struct nvmet_ns *ns) { if (ns->file) { @@ -30,7 +42,6 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) int nvmet_file_ns_enable(struct nvmet_ns *ns) { int flags = O_RDWR | O_LARGEFILE; - struct kstat stat; int ret; if (!ns->buffered_io) @@ -43,12 +54,10 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) return PTR_ERR(ns->file); } - ret = vfs_getattr(&ns->file->f_path, - &stat, STATX_SIZE, AT_STATX_FORCE_SYNC); + ret = nvmet_file_ns_revalidate(ns); if (ret) goto err; - ns->size = stat.size; /* * i_blkbits can be greater than the universally accepted upper bound, * so make sure we export a sane namespace lba_shift. @@ -232,7 +241,7 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) return; if (!req->sg_cnt || !nr_bvec) { @@ -276,7 +285,7 @@ static void nvmet_file_flush_work(struct work_struct *w) static void nvmet_file_execute_flush(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); schedule_work(&req->f.work); @@ -366,7 +375,7 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) { - if (!nvmet_check_data_len(req, 0)) + if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); schedule_work(&req->f.work); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 421dff3ea143..809691291e73 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -19,6 +19,7 @@ #include <linux/rcupdate.h> #include <linux/blkdev.h> #include <linux/radix-tree.h> +#include <linux/t10-pi.h> #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 @@ -77,6 +78,8 @@ struct nvmet_ns { int use_p2pmem; struct pci_dev *p2p_dev; + int pi_type; + int metadata_size; }; static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) @@ -142,6 +145,7 @@ struct nvmet_port { bool enabled; int inline_data_size; const struct nvmet_fabrics_ops *tr_ops; + bool pi_enable; }; static inline struct nvmet_port *to_nvmet_port(struct config_item *item) @@ -201,6 +205,7 @@ struct nvmet_ctrl { spinlock_t error_lock; u64 err_counter; struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; + bool pi_support; }; struct nvmet_subsys_model { @@ -230,6 +235,7 @@ struct nvmet_subsys { u64 ver; u64 serial; char *subsysnqn; + bool pi_support; struct config_group group; @@ -281,6 +287,7 @@ struct nvmet_fabrics_ops { unsigned int type; unsigned int msdbd; bool has_keyed_sgls : 1; + bool metadata_support : 1; void (*queue_response)(struct nvmet_req *req); int (*add_port)(struct nvmet_port *port); void (*remove_port)(struct nvmet_port *port); @@ -302,6 +309,7 @@ struct nvmet_req { struct nvmet_cq *cq; struct nvmet_ns *ns; struct scatterlist *sg; + struct scatterlist *metadata_sg; struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; union { struct { @@ -315,8 +323,10 @@ struct nvmet_req { } f; }; int sg_cnt; + int metadata_sg_cnt; /* data length as parsed from the SGL descriptor: */ size_t transfer_len; + size_t metadata_len; struct nvmet_port *port; @@ -384,11 +394,11 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); -bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); +bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); -int nvmet_req_alloc_sgl(struct nvmet_req *req); -void nvmet_req_free_sgl(struct nvmet_req *req); +int nvmet_req_alloc_sgls(struct nvmet_req *req); +void nvmet_req_free_sgls(struct nvmet_req *req); void nvmet_execute_keep_alive(struct nvmet_req *req); @@ -498,13 +508,24 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns); u16 nvmet_bdev_flush(struct nvmet_req *req); u16 nvmet_file_flush(struct nvmet_req *req); void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); +void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); +int nvmet_file_ns_revalidate(struct nvmet_ns *ns); +void nvmet_ns_revalidate(struct nvmet_ns *ns); -static inline u32 nvmet_rw_len(struct nvmet_req *req) +static inline u32 nvmet_rw_data_len(struct nvmet_req *req) { return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << req->ns->blksize_shift; } +static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return 0; + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) * + req->ns->metadata_size; +} + static inline u32 nvmet_dsm_len(struct nvmet_req *req) { return (le32_to_cpu(req->cmd->dsm.nr) + 1) * @@ -519,4 +540,11 @@ static inline __le16 to0based(u32 a) return cpu_to_le16(max(1U, min(1U << 16, a)) - 1); } +static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns) +{ + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return false; + return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple); +} + #endif /* _NVMET_H */ diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index fd47de0e4e4e..d5141780592e 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -33,6 +33,9 @@ /* Assume mpsmin == device_page_size == 4KB */ #define NVMET_RDMA_MAX_MDTS 8 +#define NVMET_RDMA_MAX_METADATA_MDTS 5 + +struct nvmet_rdma_srq; struct nvmet_rdma_cmd { struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; @@ -41,6 +44,7 @@ struct nvmet_rdma_cmd { struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; struct nvme_command *nvme_cmd; struct nvmet_rdma_queue *queue; + struct nvmet_rdma_srq *nsrq; }; enum { @@ -57,6 +61,7 @@ struct nvmet_rdma_rsp { struct nvmet_rdma_queue *queue; struct ib_cqe read_cqe; + struct ib_cqe write_cqe; struct rdma_rw_ctx rw; struct nvmet_req req; @@ -83,6 +88,7 @@ struct nvmet_rdma_queue { struct ib_cq *cq; atomic_t sq_wr_avail; struct nvmet_rdma_device *dev; + struct nvmet_rdma_srq *nsrq; spinlock_t state_lock; enum nvmet_rdma_queue_state state; struct nvmet_cq nvme_cq; @@ -100,6 +106,7 @@ struct nvmet_rdma_queue { int idx; int host_qid; + int comp_vector; int recv_queue_size; int send_queue_size; @@ -113,11 +120,17 @@ struct nvmet_rdma_port { struct delayed_work repair_work; }; +struct nvmet_rdma_srq { + struct ib_srq *srq; + struct nvmet_rdma_cmd *cmds; + struct nvmet_rdma_device *ndev; +}; + struct nvmet_rdma_device { struct ib_device *device; struct ib_pd *pd; - struct ib_srq *srq; - struct nvmet_rdma_cmd *srq_cmds; + struct nvmet_rdma_srq **srqs; + int srq_count; size_t srq_size; struct kref ref; struct list_head entry; @@ -129,6 +142,16 @@ static bool nvmet_rdma_use_srq; module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); MODULE_PARM_DESC(use_srq, "Use shared receive queue."); +static int srq_size_set(const char *val, const struct kernel_param *kp); +static const struct kernel_param_ops srq_size_ops = { + .set = srq_size_set, + .get = param_get_int, +}; + +static int nvmet_rdma_srq_size = 1024; +module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644); +MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)"); + static DEFINE_IDA(nvmet_rdma_queue_ida); static LIST_HEAD(nvmet_rdma_queue_list); static DEFINE_MUTEX(nvmet_rdma_queue_mutex); @@ -140,6 +163,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp); static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, @@ -149,6 +173,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, static const struct nvmet_fabrics_ops nvmet_rdma_ops; +static int srq_size_set(const char *val, const struct kernel_param *kp) +{ + int n = 0, ret; + + ret = kstrtoint(val, 10, &n); + if (ret != 0 || n < 256) + return -EINVAL; + + return param_set_int(val, kp); +} + static int num_pages(int len) { return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); @@ -391,6 +426,9 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, /* Data In / RDMA READ */ r->read_cqe.done = nvmet_rdma_read_data_done; + /* Data Out / RDMA WRITE */ + r->write_cqe.done = nvmet_rdma_write_data_done; + return 0; out_free_rsp: @@ -466,8 +504,8 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, cmd->sge[0].addr, cmd->sge[0].length, DMA_FROM_DEVICE); - if (ndev->srq) - ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); + if (cmd->nsrq) + ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL); else ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); @@ -500,6 +538,129 @@ static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) spin_unlock(&queue->rsp_wr_wait_lock); } +static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) +{ + struct ib_mr_status mr_status; + int ret; + u16 status = 0; + + ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); + if (ret) { + pr_err("ib_check_mr_status failed, ret %d\n", ret); + return NVME_SC_INVALID_PI; + } + + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + status = NVME_SC_GUARD_CHECK; + break; + case IB_SIG_BAD_REFTAG: + status = NVME_SC_REFTAG_CHECK; + break; + case IB_SIG_BAD_APPTAG: + status = NVME_SC_APPTAG_CHECK; + break; + } + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", + mr_status.sig_err.err_type, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + } + + return status; +} + +static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, + struct nvme_command *cmd, struct ib_sig_domain *domain, + u16 control, u8 pi_type) +{ + domain->sig_type = IB_SIG_TYPE_T10_DIF; + domain->sig.dif.bg_type = IB_T10DIF_CRC; + domain->sig.dif.pi_interval = 1 << bi->interval_exp; + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); + if (control & NVME_RW_PRINFO_PRCHK_REF) + domain->sig.dif.ref_remap = true; + + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); + domain->sig.dif.app_escape = true; + if (pi_type == NVME_NS_DPS_PI_TYPE3) + domain->sig.dif.ref_escape = true; +} + +static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, + struct ib_sig_attrs *sig_attrs) +{ + struct nvme_command *cmd = req->cmd; + u16 control = le16_to_cpu(cmd->rw.control); + u8 pi_type = req->ns->pi_type; + struct blk_integrity *bi; + + bi = bdev_get_integrity(req->ns->bdev); + + memset(sig_attrs, 0, sizeof(*sig_attrs)); + + if (control & NVME_RW_PRINFO_PRACT) { + /* for WRITE_INSERT/READ_STRIP no wire domain */ + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + /* Clear the PRACT bit since HCA will generate/verify the PI */ + control &= ~NVME_RW_PRINFO_PRACT; + cmd->rw.control = cpu_to_le16(control); + /* PI is added by the HW */ + req->transfer_len += req->metadata_len; + } else { + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, + pi_type); + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, + pi_type); + } + + if (control & NVME_RW_PRINFO_PRCHK_REF) + sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; + if (control & NVME_RW_PRINFO_PRCHK_GUARD) + sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; + if (control & NVME_RW_PRINFO_PRCHK_APP) + sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; +} + +static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, + struct ib_sig_attrs *sig_attrs) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + int ret; + + if (req->metadata_len) + ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, + req->metadata_sg, req->metadata_sg_cnt, sig_attrs, + addr, key, nvmet_data_dir(req)); + else + ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, 0, addr, key, + nvmet_data_dir(req)); + + return ret; +} + +static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) +{ + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + struct nvmet_req *req = &rsp->req; + + if (req->metadata_len) + rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, + cm_id->port_num, req->sg, req->sg_cnt, + req->metadata_sg, req->metadata_sg_cnt, + nvmet_data_dir(req)); + else + rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, + req->sg, req->sg_cnt, nvmet_data_dir(req)); +} static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) { @@ -507,14 +668,11 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); - if (rsp->n_rdma) { - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); - } + if (rsp->n_rdma) + nvmet_rdma_rw_ctx_destroy(rsp); if (rsp->req.sg != rsp->cmd->inline_sg) - nvmet_req_free_sgl(&rsp->req); + nvmet_req_free_sgls(&rsp->req); if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) nvmet_rdma_process_wr_wait_list(queue); @@ -566,11 +724,16 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) rsp->send_wr.opcode = IB_WR_SEND; } - if (nvmet_rdma_need_data_out(rsp)) - first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, - cm_id->port_num, NULL, &rsp->send_wr); - else + if (nvmet_rdma_need_data_out(rsp)) { + if (rsp->req.metadata_len) + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, &rsp->write_cqe, NULL); + else + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, + cm_id->port_num, NULL, &rsp->send_wr); + } else { first_wr = &rsp->send_wr; + } nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); @@ -589,15 +752,14 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) struct nvmet_rdma_rsp *rsp = container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); struct nvmet_rdma_queue *queue = cq->cq_context; + u16 status = 0; WARN_ON(rsp->n_rdma <= 0); atomic_add(rsp->n_rdma, &queue->sq_wr_avail); - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, - queue->cm_id->port_num, rsp->req.sg, - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); rsp->n_rdma = 0; if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); nvmet_req_uninit(&rsp->req); nvmet_rdma_release_rsp(rsp); if (wc->status != IB_WC_WR_FLUSH_ERR) { @@ -608,7 +770,58 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) return; } - rsp->req.execute(&rsp->req); + if (rsp->req.metadata_len) + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(status)) + nvmet_req_complete(&rsp->req, status); + else + rsp->req.execute(&rsp->req); +} + +static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) +{ + struct nvmet_rdma_rsp *rsp = + container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); + struct nvmet_rdma_queue *queue = cq->cq_context; + struct rdma_cm_id *cm_id = rsp->queue->cm_id; + u16 status; + + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) + return; + + WARN_ON(rsp->n_rdma <= 0); + atomic_add(rsp->n_rdma, &queue->sq_wr_avail); + rsp->n_rdma = 0; + + if (unlikely(wc->status != IB_WC_SUCCESS)) { + nvmet_rdma_rw_ctx_destroy(rsp); + nvmet_req_uninit(&rsp->req); + nvmet_rdma_release_rsp(rsp); + if (wc->status != IB_WC_WR_FLUSH_ERR) { + pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n", + wc->wr_cqe, ib_wc_status_msg(wc->status), + wc->status); + nvmet_rdma_error_comp(queue); + } + return; + } + + /* + * Upon RDMA completion check the signature status + * - if succeeded send good NVMe response + * - if failed send bad NVMe response with appropriate error + */ + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); + if (unlikely(status)) + rsp->req.cqe->status = cpu_to_le16(status << 1); + nvmet_rdma_rw_ctx_destroy(rsp); + + if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { + pr_err("sending cmd response failed\n"); + nvmet_rdma_release_rsp(rsp); + } } static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, @@ -665,9 +878,9 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, struct nvme_keyed_sgl_desc *sgl, bool invalidate) { - struct rdma_cm_id *cm_id = rsp->queue->cm_id; u64 addr = le64_to_cpu(sgl->addr); u32 key = get_unaligned_le32(sgl->key); + struct ib_sig_attrs sig_attrs; int ret; rsp->req.transfer_len = get_unaligned_le24(sgl->length); @@ -676,13 +889,14 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, if (!rsp->req.transfer_len) return 0; - ret = nvmet_req_alloc_sgl(&rsp->req); + if (rsp->req.metadata_len) + nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); + + ret = nvmet_req_alloc_sgls(&rsp->req); if (unlikely(ret < 0)) goto error_out; - ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, - rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, - nvmet_data_dir(&rsp->req)); + ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); if (unlikely(ret < 0)) goto error_out; rsp->n_rdma += ret; @@ -845,23 +1059,40 @@ static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) nvmet_rdma_handle_command(queue, rsp); } -static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev) +static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq) { - if (!ndev->srq) + nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size, + false); + ib_destroy_srq(nsrq->srq); + + kfree(nsrq); +} + +static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev) +{ + int i; + + if (!ndev->srqs) return; - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); - ib_destroy_srq(ndev->srq); + for (i = 0; i < ndev->srq_count; i++) + nvmet_rdma_destroy_srq(ndev->srqs[i]); + + kfree(ndev->srqs); } -static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) +static struct nvmet_rdma_srq * +nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) { struct ib_srq_init_attr srq_attr = { NULL, }; + size_t srq_size = ndev->srq_size; + struct nvmet_rdma_srq *nsrq; struct ib_srq *srq; - size_t srq_size; int ret, i; - srq_size = 4095; /* XXX: tune */ + nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL); + if (!nsrq) + return ERR_PTR(-ENOMEM); srq_attr.attr.max_wr = srq_size; srq_attr.attr.max_sge = 1 + ndev->inline_page_count; @@ -869,35 +1100,73 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) srq_attr.srq_type = IB_SRQT_BASIC; srq = ib_create_srq(ndev->pd, &srq_attr); if (IS_ERR(srq)) { - /* - * If SRQs aren't supported we just go ahead and use normal - * non-shared receive queues. - */ - pr_info("SRQ requested but not supported.\n"); - return 0; + ret = PTR_ERR(srq); + goto out_free; } - ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); - if (IS_ERR(ndev->srq_cmds)) { - ret = PTR_ERR(ndev->srq_cmds); + nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); + if (IS_ERR(nsrq->cmds)) { + ret = PTR_ERR(nsrq->cmds); goto out_destroy_srq; } - ndev->srq = srq; - ndev->srq_size = srq_size; + nsrq->srq = srq; + nsrq->ndev = ndev; for (i = 0; i < srq_size; i++) { - ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); + nsrq->cmds[i].nsrq = nsrq; + ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]); if (ret) goto out_free_cmds; } - return 0; + return nsrq; out_free_cmds: - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); + nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false); out_destroy_srq: ib_destroy_srq(srq); +out_free: + kfree(nsrq); + return ERR_PTR(ret); +} + +static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev) +{ + int i, ret; + + if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) { + /* + * If SRQs aren't supported we just go ahead and use normal + * non-shared receive queues. + */ + pr_info("SRQ requested but not supported.\n"); + return 0; + } + + ndev->srq_size = min(ndev->device->attrs.max_srq_wr, + nvmet_rdma_srq_size); + ndev->srq_count = min(ndev->device->num_comp_vectors, + ndev->device->attrs.max_srq); + + ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL); + if (!ndev->srqs) + return -ENOMEM; + + for (i = 0; i < ndev->srq_count; i++) { + ndev->srqs[i] = nvmet_rdma_init_srq(ndev); + if (IS_ERR(ndev->srqs[i])) { + ret = PTR_ERR(ndev->srqs[i]); + goto err_srq; + } + } + + return 0; + +err_srq: + while (--i >= 0) + nvmet_rdma_destroy_srq(ndev->srqs[i]); + kfree(ndev->srqs); return ret; } @@ -910,7 +1179,7 @@ static void nvmet_rdma_free_dev(struct kref *ref) list_del(&ndev->entry); mutex_unlock(&device_list_mutex); - nvmet_rdma_destroy_srq(ndev); + nvmet_rdma_destroy_srqs(ndev); ib_dealloc_pd(ndev->pd); kfree(ndev); @@ -957,7 +1226,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) goto out_free_dev; if (nvmet_rdma_use_srq) { - ret = nvmet_rdma_init_srq(ndev); + ret = nvmet_rdma_init_srqs(ndev); if (ret) goto out_free_pd; } @@ -981,14 +1250,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) { struct ib_qp_init_attr qp_attr; struct nvmet_rdma_device *ndev = queue->dev; - int comp_vector, nr_cqe, ret, i, factor; - - /* - * Spread the io queues across completion vectors, - * but still keep all admin queues on vector 0. - */ - comp_vector = !queue->host_qid ? 0 : - queue->idx % ndev->device->num_comp_vectors; + int nr_cqe, ret, i, factor; /* * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND. @@ -996,7 +1258,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size; queue->cq = ib_alloc_cq(ndev->device, queue, - nr_cqe + 1, comp_vector, + nr_cqe + 1, queue->comp_vector, IB_POLL_WORKQUEUE); if (IS_ERR(queue->cq)) { ret = PTR_ERR(queue->cq); @@ -1020,14 +1282,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, ndev->device->attrs.max_send_sge); - if (ndev->srq) { - qp_attr.srq = ndev->srq; + if (queue->nsrq) { + qp_attr.srq = queue->nsrq->srq; } else { /* +1 for drain */ qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; } + if (queue->port->pi_enable && queue->host_qid) + qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; + ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); if (ret) { pr_err("failed to create_qp ret= %d\n", ret); @@ -1041,7 +1306,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) __func__, queue->cq->cqe, qp_attr.cap.max_send_sge, qp_attr.cap.max_send_wr, queue->cm_id); - if (!ndev->srq) { + if (!queue->nsrq) { for (i = 0; i < queue->recv_queue_size; i++) { queue->cmds[i].queue = queue; ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); @@ -1076,7 +1341,7 @@ static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue) nvmet_sq_destroy(&queue->nvme_sq); nvmet_rdma_destroy_queue_ib(queue); - if (!queue->dev->srq) { + if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); @@ -1146,6 +1411,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { + struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_queue *queue; int ret; @@ -1172,6 +1438,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); queue->dev = ndev; queue->cm_id = cm_id; + queue->port = port->nport; spin_lock_init(&queue->state_lock); queue->state = NVMET_RDMA_Q_CONNECTING; @@ -1188,13 +1455,23 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, goto out_destroy_sq; } + /* + * Spread the io queues across completion vectors, + * but still keep all admin queues on vector 0. + */ + queue->comp_vector = !queue->host_qid ? 0 : + queue->idx % ndev->device->num_comp_vectors; + + ret = nvmet_rdma_alloc_rsps(queue); if (ret) { ret = NVME_RDMA_CM_NO_RSC; goto out_ida_remove; } - if (!ndev->srq) { + if (ndev->srqs) { + queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count]; + } else { queue->cmds = nvmet_rdma_alloc_cmds(ndev, queue->recv_queue_size, !queue->host_qid); @@ -1215,7 +1492,7 @@ nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev, return queue; out_free_cmds: - if (!ndev->srq) { + if (!queue->nsrq) { nvmet_rdma_free_cmds(queue->dev, queue->cmds, queue->recv_queue_size, !queue->host_qid); @@ -1241,6 +1518,10 @@ static void nvmet_rdma_qp_event(struct ib_event *event, void *priv) case IB_EVENT_COMM_EST: rdma_notify(queue->cm_id, event->event); break; + case IB_EVENT_QP_LAST_WQE_REACHED: + pr_debug("received last WQE reached event for queue=0x%p\n", + queue); + break; default: pr_err("received IB QP event: %s (%d)\n", ib_event_msg(event->event), event->event); @@ -1275,7 +1556,6 @@ static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id, static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event) { - struct nvmet_rdma_port *port = cm_id->context; struct nvmet_rdma_device *ndev; struct nvmet_rdma_queue *queue; int ret = -EINVAL; @@ -1291,7 +1571,6 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, ret = -ENOMEM; goto put_device; } - queue->port = port->nport; if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ @@ -1563,6 +1842,14 @@ static int nvmet_rdma_enable_port(struct nvmet_rdma_port *port) goto out_destroy_id; } + if (port->nport->pi_enable && + !(cm_id->device->attrs.device_cap_flags & + IB_DEVICE_INTEGRITY_HANDOVER)) { + pr_err("T10-PI is not supported for %pISpcs\n", addr); + ret = -EINVAL; + goto out_destroy_id; + } + port->cm_id = cm_id; return 0; @@ -1672,6 +1959,8 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) { + if (ctrl->pi_support) + return NVMET_RDMA_MAX_METADATA_MDTS; return NVMET_RDMA_MAX_MDTS; } @@ -1680,6 +1969,7 @@ static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .type = NVMF_TRTYPE_RDMA, .msdbd = 1, .has_keyed_sgls = 1, + .metadata_support = 1, .add_port = nvmet_rdma_add_port, .remove_port = nvmet_rdma_remove_port, .queue_response = nvmet_rdma_queue_response, diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index f0da04e960f4..6f557db0320d 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -325,6 +325,14 @@ static void nvmet_tcp_fatal_error(struct nvmet_tcp_queue *queue) kernel_sock_shutdown(queue->sock, SHUT_RDWR); } +static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) +{ + if (status == -EPIPE || status == -ECONNRESET) + kernel_sock_shutdown(queue->sock, SHUT_RDWR); + else + nvmet_tcp_fatal_error(queue); +} + static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) { struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; @@ -510,7 +518,7 @@ static int nvmet_try_send_data_pdu(struct nvmet_tcp_cmd *cmd) ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), offset_in_page(cmd->data_pdu) + cmd->offset, - left, MSG_DONTWAIT | MSG_MORE); + left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); if (ret <= 0) return ret; @@ -538,7 +546,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd, bool last_in_batch) if ((!last_in_batch && cmd->queue->send_list_len) || cmd->wbytes_done + left < cmd->req.transfer_len || queue->data_digest || !queue->nvme_sq.sqhd_disabled) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, left, flags); @@ -585,7 +593,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd, int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; else flags |= MSG_EOR; @@ -614,7 +622,7 @@ static int nvmet_try_send_r2t(struct nvmet_tcp_cmd *cmd, bool last_in_batch) int ret; if (!last_in_batch && cmd->queue->send_list_len) - flags |= MSG_MORE; + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; else flags |= MSG_EOR; @@ -644,6 +652,8 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd, bool last_in_batch) if (!last_in_batch && cmd->queue->send_list_len) msg.msg_flags |= MSG_MORE; + else + msg.msg_flags |= MSG_EOR; ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); if (unlikely(ret <= 0)) @@ -716,11 +726,15 @@ static int nvmet_tcp_try_send(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_send_one(queue, i == budget - 1); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*sends)++; } - +done: return ret; } @@ -1157,11 +1171,15 @@ static int nvmet_tcp_try_recv(struct nvmet_tcp_queue *queue, for (i = 0; i < budget; i++) { ret = nvmet_tcp_try_recv_one(queue); - if (ret <= 0) + if (unlikely(ret < 0)) { + nvmet_tcp_socket_error(queue, ret); + goto done; + } else if (ret == 0) { break; + } (*recvs)++; } - +done: return ret; } @@ -1186,27 +1204,16 @@ static void nvmet_tcp_io_work(struct work_struct *w) pending = false; ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); - if (ret > 0) { + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); - if (ret > 0) { - /* transmitted message/data */ + if (ret > 0) pending = true; - } else if (ret < 0) { - if (ret == -EPIPE || ret == -ECONNRESET) - kernel_sock_shutdown(queue->sock, SHUT_RDWR); - else - nvmet_tcp_fatal_error(queue); + else if (ret < 0) return; - } } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); diff --git a/drivers/nvme/target/trace.h b/drivers/nvme/target/trace.h index e645caa882dd..0458046d6501 100644 --- a/drivers/nvme/target/trace.h +++ b/drivers/nvme/target/trace.h @@ -130,6 +130,34 @@ TRACE_EVENT(nvmet_req_complete, ); +#define aer_name(aer) { aer, #aer } + +TRACE_EVENT(nvmet_async_event, + TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result), + TP_ARGS(ctrl, result), + TP_STRUCT__entry( + __field(int, ctrl_id) + __field(u32, result) + ), + TP_fast_assign( + __entry->ctrl_id = ctrl->cntlid; + __entry->result = (le32_to_cpu(result) & 0xff00) >> 8; + ), + TP_printk("nvmet%d: NVME_AEN=%#08x [%s]", + __entry->ctrl_id, __entry->result, + __print_symbolic(__entry->result, + aer_name(NVME_AER_NOTICE_NS_CHANGED), + aer_name(NVME_AER_NOTICE_ANA), + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_NOTICE_DISC_CHANGED), + aer_name(NVME_AER_ERROR), + aer_name(NVME_AER_SMART), + aer_name(NVME_AER_CSS), + aer_name(NVME_AER_VS)) + ) +); +#undef aer_name + #endif /* _TRACE_NVMET_H */ #undef TRACE_INCLUDE_PATH diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 9a5f3add325f..777734d1b4e5 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -22,6 +22,7 @@ #include <asm/schid.h> #include <asm/cmb.h> #include <linux/uaccess.h> +#include <linux/dasd_mod.h> /* This is ugly... */ #define PRINTK_HEADER "dasd_ioctl:" @@ -457,10 +458,9 @@ static int dasd_ioctl_read_profile(struct dasd_block *block, void __user *argp) /* * Return dasd information. Used for BIODASDINFO and BIODASDINFO2. */ -static int dasd_ioctl_information(struct dasd_block *block, - unsigned int cmd, void __user *argp) +static int __dasd_ioctl_information(struct dasd_block *block, + struct dasd_information2_t *dasd_info) { - struct dasd_information2_t *dasd_info; struct subchannel_id sch_id; struct ccw_dev_id dev_id; struct dasd_device *base; @@ -473,15 +473,9 @@ static int dasd_ioctl_information(struct dasd_block *block, if (!base->discipline || !base->discipline->fill_info) return -EINVAL; - dasd_info = kzalloc(sizeof(struct dasd_information2_t), GFP_KERNEL); - if (dasd_info == NULL) - return -ENOMEM; - rc = base->discipline->fill_info(base, dasd_info); - if (rc) { - kfree(dasd_info); + if (rc) return rc; - } cdev = base->cdev; ccw_device_get_id(cdev, &dev_id); @@ -520,15 +514,24 @@ static int dasd_ioctl_information(struct dasd_block *block, list_for_each(l, &base->ccw_queue) dasd_info->chanq_len++; spin_unlock_irqrestore(&block->queue_lock, flags); + return 0; +} - rc = 0; - if (copy_to_user(argp, dasd_info, - ((cmd == (unsigned int) BIODASDINFO2) ? - sizeof(struct dasd_information2_t) : - sizeof(struct dasd_information_t)))) - rc = -EFAULT; +static int dasd_ioctl_information(struct dasd_block *block, void __user *argp, + size_t copy_size) +{ + struct dasd_information2_t *dasd_info; + int error; + + dasd_info = kzalloc(sizeof(*dasd_info), GFP_KERNEL); + if (!dasd_info) + return -ENOMEM; + + error = __dasd_ioctl_information(block, dasd_info); + if (!error && copy_to_user(argp, dasd_info, copy_size)) + error = -EFAULT; kfree(dasd_info); - return rc; + return error; } /* @@ -622,10 +625,12 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode, rc = dasd_ioctl_check_format(bdev, argp); break; case BIODASDINFO: - rc = dasd_ioctl_information(block, cmd, argp); + rc = dasd_ioctl_information(block, argp, + sizeof(struct dasd_information_t)); break; case BIODASDINFO2: - rc = dasd_ioctl_information(block, cmd, argp); + rc = dasd_ioctl_information(block, argp, + sizeof(struct dasd_information2_t)); break; case BIODASDPRRD: rc = dasd_ioctl_read_profile(block, argp); @@ -660,3 +665,36 @@ int dasd_ioctl(struct block_device *bdev, fmode_t mode, dasd_put_device(base); return rc; } + + +/** + * dasd_biodasdinfo() - fill out the dasd information structure + * @disk [in]: pointer to gendisk structure that references a DASD + * @info [out]: pointer to the dasd_information2_t structure + * + * Provide access to DASD specific information. + * The gendisk structure is checked if it belongs to the DASD driver by + * comparing the gendisk->fops pointer. + * If it does not belong to the DASD driver -EINVAL is returned. + * Otherwise the provided dasd_information2_t structure is filled out. + * + * Returns: + * %0 on success and a negative error value on failure. + */ +int dasd_biodasdinfo(struct gendisk *disk, struct dasd_information2_t *info) +{ + struct dasd_device *base; + int error; + + if (disk->fops != &dasd_device_operations) + return -EINVAL; + + base = dasd_device_from_gendisk(disk); + if (!base) + return -ENODEV; + error = __dasd_ioctl_information(base->block, info); + dasd_put_device(base); + return error; +} +/* export that symbol_get in partition detection is possible */ +EXPORT_SYMBOL_GPL(dasd_biodasdinfo); diff --git a/drivers/scsi/lpfc/lpfc.h b/drivers/scsi/lpfc/lpfc.h index 8e2a356911a9..62e96d4fdcc6 100644 --- a/drivers/scsi/lpfc/lpfc.h +++ b/drivers/scsi/lpfc/lpfc.h @@ -143,7 +143,7 @@ struct lpfc_dmabuf { struct lpfc_nvmet_ctxbuf { struct list_head list; - struct lpfc_nvmet_rcv_ctx *context; + struct lpfc_async_xchg_ctx *context; struct lpfc_iocbq *iocbq; struct lpfc_sglq *sglq; struct work_struct defer_work; diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 1354c141d614..f089867674cb 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -37,8 +37,6 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme-fc-driver.h> - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -48,7 +46,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_version.h" #include "lpfc_compat.h" diff --git a/drivers/scsi/lpfc/lpfc_crtn.h b/drivers/scsi/lpfc/lpfc_crtn.h index 76dc8d9493d2..9ee6b930a655 100644 --- a/drivers/scsi/lpfc/lpfc_crtn.h +++ b/drivers/scsi/lpfc/lpfc_crtn.h @@ -24,7 +24,6 @@ typedef int (*node_filter)(struct lpfc_nodelist *, void *); struct fc_rport; struct fc_frame_header; -struct lpfc_nvmet_rcv_ctx; void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *); void lpfc_sli_read_link_ste(struct lpfc_hba *); void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t); @@ -564,12 +563,16 @@ void lpfc_nvme_update_localport(struct lpfc_vport *vport); int lpfc_nvmet_create_targetport(struct lpfc_hba *phba); int lpfc_nvmet_update_targetport(struct lpfc_hba *phba); void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba); -void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, - struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb); +int lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg); +int lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg); void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx, struct rqb_dmabuf *nvmebuf, uint64_t isr_ts, uint8_t cqflag); void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba); +void lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, + struct lpfc_nodelist *ndlp); void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb, struct lpfc_wcqe_complete *abts_cmpl); diff --git a/drivers/scsi/lpfc/lpfc_ct.c b/drivers/scsi/lpfc/lpfc_ct.c index 2aa578d20f8c..196f6ae9952e 100644 --- a/drivers/scsi/lpfc/lpfc_ct.c +++ b/drivers/scsi/lpfc/lpfc_ct.c @@ -44,7 +44,6 @@ #include "lpfc_disc.h" #include "lpfc.h" #include "lpfc_scsi.h" -#include "lpfc_nvme.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_version.h" diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c index 5a754fb5f854..4daae90e0c99 100644 --- a/drivers/scsi/lpfc/lpfc_debugfs.c +++ b/drivers/scsi/lpfc/lpfc_debugfs.c @@ -39,8 +39,6 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme-fc-driver.h> - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -50,7 +48,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" @@ -1035,7 +1032,7 @@ lpfc_debugfs_nvmestat_data(struct lpfc_vport *vport, char *buf, int size) { struct lpfc_hba *phba = vport->phba; struct lpfc_nvmet_tgtport *tgtp; - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; struct nvme_fc_local_port *localport; struct lpfc_fc4_ctrl_stat *cstat; struct lpfc_nvme_lport *lport; diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c index 789eecbf32eb..f5952f8cd4b5 100644 --- a/drivers/scsi/lpfc/lpfc_hbadisc.c +++ b/drivers/scsi/lpfc/lpfc_hbadisc.c @@ -36,8 +36,6 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme-fc-driver.h> - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_nl.h" @@ -825,6 +823,12 @@ lpfc_cleanup_rpis(struct lpfc_vport *vport, int remove) if ((phba->sli_rev < LPFC_SLI_REV4) && (!remove && ndlp->nlp_type & NLP_FABRIC)) continue; + + /* Notify transport of connectivity loss to trigger cleanup. */ + if (phba->nvmet_support && + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) + lpfc_nvmet_invalidate_host(phba, ndlp); + lpfc_disc_state_machine(vport, ndlp, NULL, remove ? NLP_EVT_DEVICE_RM diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index 4104bdcdbb6f..ea99483345f2 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -50,8 +50,6 @@ #include <scsi/scsi_tcq.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme-fc-driver.h> - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -61,7 +59,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" @@ -1032,7 +1029,7 @@ static int lpfc_hba_down_post_s4(struct lpfc_hba *phba) { struct lpfc_io_buf *psb, *psb_next; - struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next; + struct lpfc_async_xchg_ctx *ctxp, *ctxp_next; struct lpfc_sli4_hdw_queue *qp; LIST_HEAD(aborts); LIST_HEAD(nvme_aborts); @@ -1099,7 +1096,7 @@ lpfc_hba_down_post_s4(struct lpfc_hba *phba) &nvmet_aborts); spin_unlock_irq(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { - ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); + ctxp->flag &= ~(LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP); lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); } } diff --git a/drivers/scsi/lpfc/lpfc_mem.c b/drivers/scsi/lpfc/lpfc_mem.c index 7082279e4c01..726f6619230f 100644 --- a/drivers/scsi/lpfc/lpfc_mem.c +++ b/drivers/scsi/lpfc/lpfc_mem.c @@ -31,8 +31,6 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme-fc-driver.h> - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -41,8 +39,6 @@ #include "lpfc_disc.h" #include "lpfc.h" #include "lpfc_scsi.h" -#include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_crtn.h" #include "lpfc_logmsg.h" diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index a024e5a3918f..d8501bd959e7 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -32,8 +32,6 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme-fc-driver.h> - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -491,6 +489,11 @@ lpfc_rcv_plogi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, (unsigned long long) wwn_to_u64(sp->portName.u.wwn)); + /* Notify transport of connectivity loss to trigger cleanup. */ + if (phba->nvmet_support && + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) + lpfc_nvmet_invalidate_host(phba, ndlp); + ndlp->nlp_prev_state = ndlp->nlp_state; /* rport needs to be unregistered first */ lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); @@ -841,6 +844,12 @@ lpfc_rcv_logo(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL); else lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL); + + /* Notify transport of connectivity loss to trigger cleanup. */ + if (phba->nvmet_support && + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) + lpfc_nvmet_invalidate_host(phba, ndlp); + if (ndlp->nlp_DID == Fabric_DID) { if (vport->port_state <= LPFC_FDISC) goto out; diff --git a/drivers/scsi/lpfc/lpfc_nvme.c b/drivers/scsi/lpfc/lpfc_nvme.c index a45936e08031..b46ba70f78da 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.c +++ b/drivers/scsi/lpfc/lpfc_nvme.c @@ -36,9 +36,6 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme.h> -#include <linux/nvme-fc-driver.h> -#include <linux/nvme-fc.h> #include "lpfc_version.h" #include "lpfc_hw4.h" #include "lpfc_hw.h" @@ -396,43 +393,100 @@ lpfc_nvme_remoteport_delete(struct nvme_fc_remote_port *remoteport) return; } -static void -lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, - struct lpfc_wcqe_complete *wcqe) +/** + * lpfc_nvme_handle_lsreq - Process an unsolicited NVME LS request + * @phba: pointer to lpfc hba data structure. + * @axchg: pointer to exchange context for the NVME LS request + * + * This routine is used for processing an asychronously received NVME LS + * request. Any remaining validation is done and the LS is then forwarded + * to the nvme-fc transport via nvme_fc_rcv_ls_req(). + * + * The calling sequence should be: nvme_fc_rcv_ls_req() -> (processing) + * -> lpfc_nvme_xmt_ls_rsp/cmp -> req->done. + * __lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg. + * + * Returns 0 if LS was handled and delivered to the transport + * Returns 1 if LS failed to be handled and should be dropped + */ +int +lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg) { - struct lpfc_vport *vport = cmdwqe->vport; +#if (IS_ENABLED(CONFIG_NVME_FC)) + struct lpfc_vport *vport; + struct lpfc_nvme_rport *lpfc_rport; + struct nvme_fc_remote_port *remoteport; struct lpfc_nvme_lport *lport; - uint32_t status; + uint32_t *payload = axchg->payload; + int rc; + + vport = axchg->ndlp->vport; + lpfc_rport = axchg->ndlp->nrport; + if (!lpfc_rport) + return -EINVAL; + + remoteport = lpfc_rport->remoteport; + if (!vport->localport) + return -EINVAL; + + lport = vport->localport->private; + if (!lport) + return -EINVAL; + + rc = nvme_fc_rcv_ls_req(remoteport, &axchg->ls_rsp, axchg->payload, + axchg->size); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, + "6205 NVME Unsol rcv: sz %d rc %d: %08x %08x %08x " + "%08x %08x %08x\n", + axchg->size, rc, + *payload, *(payload+1), *(payload+2), + *(payload+3), *(payload+4), *(payload+5)); + + if (!rc) + return 0; +#endif + return 1; +} + +/** + * __lpfc_nvme_ls_req_cmp - Generic completion handler for a NVME + * LS request. + * @phba: Pointer to HBA context object + * @vport: The local port that issued the LS + * @cmdwqe: Pointer to driver command WQE object. + * @wcqe: Pointer to driver response CQE object. + * + * This function is the generic completion handler for NVME LS requests. + * The function updates any states and statistics, calls the transport + * ls_req done() routine, then tears down the command and buffers used + * for the LS request. + **/ +void +__lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) +{ struct nvmefc_ls_req *pnvme_lsreq; struct lpfc_dmabuf *buf_ptr; struct lpfc_nodelist *ndlp; + uint32_t status; pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2; + ndlp = (struct lpfc_nodelist *)cmdwqe->context1; status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; - if (vport->localport) { - lport = (struct lpfc_nvme_lport *)vport->localport->private; - if (lport) { - atomic_inc(&lport->fc4NvmeLsCmpls); - if (status) { - if (bf_get(lpfc_wcqe_c_xb, wcqe)) - atomic_inc(&lport->cmpl_ls_xb); - atomic_inc(&lport->cmpl_ls_err); - } - } - } - - ndlp = (struct lpfc_nodelist *)cmdwqe->context1; lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, - "6047 nvme cmpl Enter " - "Data %px DID %x Xri: %x status %x reason x%x " - "cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n", + "6047 NVMEx LS REQ %px cmpl DID %x Xri: %x " + "status %x reason x%x cmd:x%px lsreg:x%px bmp:x%px " + "ndlp:x%px\n", pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, cmdwqe->sli4_xritag, status, (wcqe->parameter & 0xffff), cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp); - lpfc_nvmeio_data(phba, "NVME LS CMPL: xri x%x stat x%x parm x%x\n", + lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x parm x%x\n", cmdwqe->sli4_xritag, status, wcqe->parameter); if (cmdwqe->context3) { @@ -445,7 +499,7 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, pnvme_lsreq->done(pnvme_lsreq, status); else lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6046 nvme cmpl without done call back? " + "6046 NVMEx cmpl without done call back? " "Data %px DID %x Xri: %x status %x\n", pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, cmdwqe->sli4_xritag, status); @@ -456,6 +510,31 @@ lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, lpfc_sli_release_iocbq(phba, cmdwqe); } +static void +lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) +{ + struct lpfc_vport *vport = cmdwqe->vport; + struct lpfc_nvme_lport *lport; + uint32_t status; + + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; + + if (vport->localport) { + lport = (struct lpfc_nvme_lport *)vport->localport->private; + if (lport) { + atomic_inc(&lport->fc4NvmeLsCmpls); + if (status) { + if (bf_get(lpfc_wcqe_c_xb, wcqe)) + atomic_inc(&lport->cmpl_ls_xb); + atomic_inc(&lport->cmpl_ls_err); + } + } + } + + __lpfc_nvme_ls_req_cmp(phba, vport, cmdwqe, wcqe); +} + static int lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, struct lpfc_dmabuf *inp, @@ -557,13 +636,6 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, /* Issue GEN REQ WQE for NPORT <did> */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, - "6050 Issue GEN REQ WQE to NPORT x%x " - "Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px " - "xmit:%d 1st:%d\n", - ndlp->nlp_DID, genwqe->iotag, - vport->port_state, - genwqe, pnvme_lsreq, bmp, xmit_len, first_len); genwqe->wqe_cmpl = cmpl; genwqe->iocb_cmpl = NULL; genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT; @@ -575,105 +647,108 @@ lpfc_nvme_gen_req(struct lpfc_vport *vport, struct lpfc_dmabuf *bmp, rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe); if (rc) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC | LOG_ELS, "6045 Issue GEN REQ WQE to NPORT x%x " - "Data: x%x x%x\n", + "Data: x%x x%x rc x%x\n", ndlp->nlp_DID, genwqe->iotag, - vport->port_state); + vport->port_state, rc); lpfc_sli_release_iocbq(phba, genwqe); return 1; } + + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_ELS, + "6050 Issue GEN REQ WQE to NPORT x%x " + "Data: oxid: x%x state: x%x wq:x%px lsreq:x%px " + "bmp:x%px xmit:%d 1st:%d\n", + ndlp->nlp_DID, genwqe->sli4_xritag, + vport->port_state, + genwqe, pnvme_lsreq, bmp, xmit_len, first_len); return 0; } + /** - * lpfc_nvme_ls_req - Issue an Link Service request - * @lpfc_pnvme: Pointer to the driver's nvme instance data - * @lpfc_nvme_lport: Pointer to the driver's local port data - * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq + * __lpfc_nvme_ls_req - Generic service routine to issue an NVME LS request + * @vport: The local port issuing the LS + * @ndlp: The remote port to send the LS to + * @pnvme_lsreq: Pointer to LS request structure from the transport * - * Driver registers this routine to handle any link service request - * from the nvme_fc transport to a remote nvme-aware port. + * Routine validates the ndlp, builds buffers and sends a GEN_REQUEST + * WQE to perform the LS operation. * * Return value : * 0 - Success - * TODO: What are the failure codes. + * non-zero: various error codes, in form of -Exxx **/ -static int -lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, - struct nvme_fc_remote_port *pnvme_rport, - struct nvmefc_ls_req *pnvme_lsreq) +int +__lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct nvmefc_ls_req *pnvme_lsreq, + void (*gen_req_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)) { - int ret = 0; - struct lpfc_nvme_lport *lport; - struct lpfc_nvme_rport *rport; - struct lpfc_vport *vport; - struct lpfc_nodelist *ndlp; - struct ulp_bde64 *bpl; struct lpfc_dmabuf *bmp; + struct ulp_bde64 *bpl; + int ret; uint16_t ntype, nstate; - /* there are two dma buf in the request, actually there is one and - * the second one is just the start address + cmd size. - * Before calling lpfc_nvme_gen_req these buffers need to be wrapped - * in a lpfc_dmabuf struct. When freeing we just free the wrapper - * because the nvem layer owns the data bufs. - * We do not have to break these packets open, we don't care what is in - * them. And we do not have to look at the resonse data, we only care - * that we got a response. All of the caring is going to happen in the - * nvme-fc layer. - */ - - lport = (struct lpfc_nvme_lport *)pnvme_lport->private; - rport = (struct lpfc_nvme_rport *)pnvme_rport->private; - if (unlikely(!lport) || unlikely(!rport)) - return -EINVAL; - - vport = lport->vport; - - if (vport->load_flag & FC_UNLOADING) - return -ENODEV; - - /* Need the ndlp. It is stored in the driver's rport. */ - ndlp = rport->ndlp; if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, - "6051 Remoteport x%px, rport has invalid ndlp. " - "Failing LS Req\n", pnvme_rport); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR, + "6051 NVMEx LS REQ: Bad NDLP x%px, Failing " + "LS Req\n", + ndlp); return -ENODEV; } - /* The remote node has to be a mapped nvme target or an - * unmapped nvme initiator or it's an error. - */ ntype = ndlp->nlp_type; nstate = ndlp->nlp_state; if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) || (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, - "6088 DID x%06x not ready for " - "IO. State x%x, Type x%x\n", - pnvme_rport->port_id, - ndlp->nlp_state, ndlp->nlp_type); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR, + "6088 NVMEx LS REQ: Fail DID x%06x not " + "ready for IO. Type x%x, State x%x\n", + ndlp->nlp_DID, ntype, nstate); return -ENODEV; } - bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); + + /* + * there are two dma buf in the request, actually there is one and + * the second one is just the start address + cmd size. + * Before calling lpfc_nvme_gen_req these buffers need to be wrapped + * in a lpfc_dmabuf struct. When freeing we just free the wrapper + * because the nvem layer owns the data bufs. + * We do not have to break these packets open, we don't care what is + * in them. And we do not have to look at the resonse data, we only + * care that we got a response. All of the caring is going to happen + * in the nvme-fc layer. + */ + + bmp = kmalloc(sizeof(*bmp), GFP_KERNEL); if (!bmp) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6044 Could not find node for DID %x\n", - pnvme_rport->port_id); - return 2; + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR, + "6044 NVMEx LS REQ: Could not alloc LS buf " + "for DID %x\n", + ndlp->nlp_DID); + return -ENOMEM; } - INIT_LIST_HEAD(&bmp->list); + bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys)); if (!bmp->virt) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6042 Could not find node for DID %x\n", - pnvme_rport->port_id); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR, + "6042 NVMEx LS REQ: Could not alloc mbuf " + "for DID %x\n", + ndlp->nlp_DID); kfree(bmp); - return 3; + return -ENOMEM; } + + INIT_LIST_HEAD(&bmp->list); + bpl = (struct ulp_bde64 *)bmp->virt; bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma)); bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma)); @@ -688,118 +763,206 @@ lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, bpl->tus.f.bdeSize = pnvme_lsreq->rsplen; bpl->tus.w = le32_to_cpu(bpl->tus.w); - /* Expand print to include key fields. */ lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, - "6149 Issue LS Req to DID 0x%06x lport x%px, " - "rport x%px lsreq x%px rqstlen:%d rsplen:%d " - "%pad %pad\n", - ndlp->nlp_DID, pnvme_lport, pnvme_rport, - pnvme_lsreq, pnvme_lsreq->rqstlen, - pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, - &pnvme_lsreq->rspdma); - - atomic_inc(&lport->fc4NvmeLsRequests); + "6149 NVMEx LS REQ: Issue to DID 0x%06x lsreq x%px, " + "rqstlen:%d rsplen:%d %pad %pad\n", + ndlp->nlp_DID, pnvme_lsreq, pnvme_lsreq->rqstlen, + pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, + &pnvme_lsreq->rspdma); - /* Hardcode the wait to 30 seconds. Connections are failing otherwise. - * This code allows it all to work. - */ ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr, - pnvme_lsreq, lpfc_nvme_cmpl_gen_req, - ndlp, 2, 30, 0); + pnvme_lsreq, gen_req_cmp, ndlp, 2, + LPFC_NVME_LS_TIMEOUT, 0); if (ret != WQE_SUCCESS) { - atomic_inc(&lport->xmt_ls_err); - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, - "6052 EXIT. issue ls wqe failed lport x%px, " - "rport x%px lsreq x%px Status %x DID %x\n", - pnvme_lport, pnvme_rport, pnvme_lsreq, - ret, ndlp->nlp_DID); + lpfc_printf_vlog(vport, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR, + "6052 NVMEx REQ: EXIT. issue ls wqe failed " + "lsreq x%px Status %x DID %x\n", + pnvme_lsreq, ret, ndlp->nlp_DID); lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys); kfree(bmp); - return ret; + return -EIO; } - /* Stub in routine and return 0 for now. */ - return ret; + return 0; } /** - * lpfc_nvme_ls_abort - Issue an Link Service request - * @lpfc_pnvme: Pointer to the driver's nvme instance data - * @lpfc_nvme_lport: Pointer to the driver's local port data - * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq + * lpfc_nvme_ls_req - Issue an NVME Link Service request + * @lpfc_nvme_lport: Transport localport that LS is to be issued from. + * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to. + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS * * Driver registers this routine to handle any link service request * from the nvme_fc transport to a remote nvme-aware port. * * Return value : * 0 - Success - * TODO: What are the failure codes. + * non-zero: various error codes, in form of -Exxx **/ -static void -lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport, - struct nvme_fc_remote_port *pnvme_rport, - struct nvmefc_ls_req *pnvme_lsreq) +static int +lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, + struct nvme_fc_remote_port *pnvme_rport, + struct nvmefc_ls_req *pnvme_lsreq) { struct lpfc_nvme_lport *lport; + struct lpfc_nvme_rport *rport; struct lpfc_vport *vport; - struct lpfc_hba *phba; - struct lpfc_nodelist *ndlp; - LIST_HEAD(abort_list); - struct lpfc_sli_ring *pring; - struct lpfc_iocbq *wqe, *next_wqe; + int ret; lport = (struct lpfc_nvme_lport *)pnvme_lport->private; - if (unlikely(!lport)) - return; - vport = lport->vport; - phba = vport->phba; + rport = (struct lpfc_nvme_rport *)pnvme_rport->private; + if (unlikely(!lport) || unlikely(!rport)) + return -EINVAL; + vport = lport->vport; if (vport->load_flag & FC_UNLOADING) - return; + return -ENODEV; + + atomic_inc(&lport->fc4NvmeLsRequests); + + ret = __lpfc_nvme_ls_req(vport, rport->ndlp, pnvme_lsreq, + lpfc_nvme_ls_req_cmp); + if (ret) + atomic_inc(&lport->xmt_ls_err); + + return ret; +} + +/** + * __lpfc_nvme_ls_abort - Generic service routine to abort a prior + * NVME LS request + * @vport: The local port that issued the LS + * @ndlp: The remote port the LS was sent to + * @pnvme_lsreq: Pointer to LS request structure from the transport + * + * The driver validates the ndlp, looks for the LS, and aborts the + * LS if found. + * + * Returns: + * 0 : if LS found and aborted + * non-zero: various error conditions in form -Exxx + **/ +int +__lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_hba *phba = vport->phba; + struct lpfc_sli_ring *pring; + struct lpfc_iocbq *wqe, *next_wqe; + bool foundit = false; - ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); if (!ndlp) { - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, - "6049 Could not find node for DID %x\n", - pnvme_rport->port_id); - return; + lpfc_printf_log(phba, KERN_ERR, + LOG_NVME_DISC | LOG_NODE | + LOG_NVME_IOERR | LOG_NVME_ABTS, + "6049 NVMEx LS REQ Abort: Bad NDLP x%px DID " + "x%06x, Failing LS Req\n", + ndlp, ndlp ? ndlp->nlp_DID : 0); + return -EINVAL; } - /* Expand print to include key fields. */ - lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, - "6040 ENTER. lport x%px, rport x%px lsreq x%px rqstlen:%d " - "rsplen:%d %pad %pad\n", - pnvme_lport, pnvme_rport, + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS, + "6040 NVMEx LS REQ Abort: Issue LS_ABORT for lsreq " + "x%p rqstlen:%d rsplen:%d %pad %pad\n", pnvme_lsreq, pnvme_lsreq->rqstlen, pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, &pnvme_lsreq->rspdma); /* - * Lock the ELS ring txcmplq and build a local list of all ELS IOs - * that need an ABTS. The IOs need to stay on the txcmplq so that - * the abort operation completes them successfully. + * Lock the ELS ring txcmplq and look for the wqe that matches + * this ELS. If found, issue an abort on the wqe. */ pring = phba->sli4_hba.nvmels_wq->pring; spin_lock_irq(&phba->hbalock); spin_lock(&pring->ring_lock); list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) { - /* Add to abort_list on on NDLP match. */ - if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) { + if (wqe->context2 == pnvme_lsreq) { wqe->iocb_flag |= LPFC_DRIVER_ABORTED; - list_add_tail(&wqe->dlist, &abort_list); + foundit = true; + break; } } spin_unlock(&pring->ring_lock); - spin_unlock_irq(&phba->hbalock); - /* Abort the targeted IOs and remove them from the abort list. */ - list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) { - atomic_inc(&lport->xmt_ls_abort); - spin_lock_irq(&phba->hbalock); - list_del_init(&wqe->dlist); + if (foundit) lpfc_sli_issue_abort_iotag(phba, pring, wqe); - spin_unlock_irq(&phba->hbalock); + spin_unlock_irq(&phba->hbalock); + + if (foundit) + return 0; + + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS, + "6213 NVMEx LS REQ Abort: Unable to locate req x%p\n", + pnvme_lsreq); + return -EINVAL; +} + +static int +lpfc_nvme_xmt_ls_rsp(struct nvme_fc_local_port *localport, + struct nvme_fc_remote_port *remoteport, + struct nvmefc_ls_rsp *ls_rsp) +{ + struct lpfc_async_xchg_ctx *axchg = + container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp); + struct lpfc_nvme_lport *lport; + int rc; + + if (axchg->phba->pport->load_flag & FC_UNLOADING) + return -ENODEV; + + lport = (struct lpfc_nvme_lport *)localport->private; + + rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, __lpfc_nvme_xmt_ls_rsp_cmp); + + if (rc) { + /* + * unless the failure is due to having already sent + * the response, an abort will be generated for the + * exchange if the rsp can't be sent. + */ + if (rc != -EALREADY) + atomic_inc(&lport->xmt_ls_abort); + return rc; } + + return 0; +} + +/** + * lpfc_nvme_ls_abort - Abort a prior NVME LS request + * @lpfc_nvme_lport: Transport localport that LS is to be issued from. + * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to. + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS + * + * Driver registers this routine to abort a NVME LS request that is + * in progress (from the transports perspective). + **/ +static void +lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport, + struct nvme_fc_remote_port *pnvme_rport, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_nvme_lport *lport; + struct lpfc_vport *vport; + struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; + int ret; + + lport = (struct lpfc_nvme_lport *)pnvme_lport->private; + if (unlikely(!lport)) + return; + vport = lport->vport; + phba = vport->phba; + + if (vport->load_flag & FC_UNLOADING) + return; + + ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); + + ret = __lpfc_nvme_ls_abort(vport, ndlp, pnvme_lsreq); + if (!ret) + atomic_inc(&lport->xmt_ls_abort); } /* Fix up the existing sgls for NVME IO. */ @@ -1911,6 +2074,7 @@ static struct nvme_fc_port_template lpfc_nvme_template = { .fcp_io = lpfc_nvme_fcp_io_submit, .ls_abort = lpfc_nvme_ls_abort, .fcp_abort = lpfc_nvme_fcp_abort, + .xmt_ls_rsp = lpfc_nvme_xmt_ls_rsp, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVME_DEFAULT_SEGS, @@ -2106,6 +2270,7 @@ lpfc_nvme_create_localport(struct lpfc_vport *vport) atomic_set(&lport->cmpl_fcp_err, 0); atomic_set(&lport->cmpl_ls_xb, 0); atomic_set(&lport->cmpl_ls_err, 0); + atomic_set(&lport->fc4NvmeLsRequests, 0); atomic_set(&lport->fc4NvmeLsCmpls, 0); } diff --git a/drivers/scsi/lpfc/lpfc_nvme.h b/drivers/scsi/lpfc/lpfc_nvme.h index 593c48ff634e..4a4c3f780e1f 100644 --- a/drivers/scsi/lpfc/lpfc_nvme.h +++ b/drivers/scsi/lpfc/lpfc_nvme.h @@ -21,6 +21,10 @@ * included with this package. * ********************************************************************/ +#include <linux/nvme.h> +#include <linux/nvme-fc-driver.h> +#include <linux/nvme-fc.h> + #define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */ #define LPFC_NVME_ERSP_LEN 0x20 @@ -74,3 +78,179 @@ struct lpfc_nvme_rport { struct lpfc_nvme_fcpreq_priv { struct lpfc_io_buf *nvme_buf; }; + +/* + * set NVME LS request timeouts to 30s. It is larger than the 2*R_A_TOV + * set by the spec, which appears to have issues with some devices. + */ +#define LPFC_NVME_LS_TIMEOUT 30 + + +#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ +#define LPFC_NVMET_RQE_MIN_POST 128 +#define LPFC_NVMET_RQE_DEF_POST 512 +#define LPFC_NVMET_RQE_DEF_COUNT 2048 +#define LPFC_NVMET_SUCCESS_LEN 12 + +#define LPFC_NVMET_MRQ_AUTO 0 +#define LPFC_NVMET_MRQ_MAX 16 + +#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC) + +/* Used for NVME Target */ +#define LPFC_NVMET_INV_HOST_ACTIVE 1 + +struct lpfc_nvmet_tgtport { + struct lpfc_hba *phba; + struct completion *tport_unreg_cmp; + atomic_t state; /* tracks nvmet hosthandle invalidation */ + + /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ + atomic_t rcv_ls_req_in; + atomic_t rcv_ls_req_out; + atomic_t rcv_ls_req_drop; + atomic_t xmt_ls_abort; + atomic_t xmt_ls_abort_cmpl; + + /* Stats counters - lpfc_nvmet_xmt_ls_rsp */ + atomic_t xmt_ls_rsp; + atomic_t xmt_ls_drop; + + /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */ + atomic_t xmt_ls_rsp_error; + atomic_t xmt_ls_rsp_aborted; + atomic_t xmt_ls_rsp_xb_set; + atomic_t xmt_ls_rsp_cmpl; + + /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */ + atomic_t rcv_fcp_cmd_in; + atomic_t rcv_fcp_cmd_out; + atomic_t rcv_fcp_cmd_drop; + atomic_t rcv_fcp_cmd_defer; + atomic_t xmt_fcp_release; + + /* Stats counters - lpfc_nvmet_xmt_fcp_op */ + atomic_t xmt_fcp_drop; + atomic_t xmt_fcp_read_rsp; + atomic_t xmt_fcp_read; + atomic_t xmt_fcp_write; + atomic_t xmt_fcp_rsp; + + /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */ + atomic_t xmt_fcp_rsp_xb_set; + atomic_t xmt_fcp_rsp_cmpl; + atomic_t xmt_fcp_rsp_error; + atomic_t xmt_fcp_rsp_aborted; + atomic_t xmt_fcp_rsp_drop; + + /* Stats counters - lpfc_nvmet_xmt_fcp_abort */ + atomic_t xmt_fcp_xri_abort_cqe; + atomic_t xmt_fcp_abort; + atomic_t xmt_fcp_abort_cmpl; + atomic_t xmt_abort_sol; + atomic_t xmt_abort_unsol; + atomic_t xmt_abort_rsp; + atomic_t xmt_abort_rsp_error; + + /* Stats counters - defer IO */ + atomic_t defer_ctx; + atomic_t defer_fod; + atomic_t defer_wqfull; +}; + +struct lpfc_nvmet_ctx_info { + struct list_head nvmet_ctx_list; + spinlock_t nvmet_ctx_list_lock; /* lock per CPU */ + struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu; + struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu; + uint16_t nvmet_ctx_list_cnt; + char pad[16]; /* pad to a cache-line */ +}; + +/* This retrieves the context info associated with the specified cpu / mrq */ +#define lpfc_get_ctx_list(phba, cpu, mrq) \ + (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) + +/* Values for state field of struct lpfc_async_xchg_ctx */ +#define LPFC_NVME_STE_LS_RCV 1 +#define LPFC_NVME_STE_LS_ABORT 2 +#define LPFC_NVME_STE_LS_RSP 3 +#define LPFC_NVME_STE_RCV 4 +#define LPFC_NVME_STE_DATA 5 +#define LPFC_NVME_STE_ABORT 6 +#define LPFC_NVME_STE_DONE 7 +#define LPFC_NVME_STE_FREE 0xff + +/* Values for flag field of struct lpfc_async_xchg_ctx */ +#define LPFC_NVME_IO_INP 0x1 /* IO is in progress on exchange */ +#define LPFC_NVME_ABORT_OP 0x2 /* Abort WQE issued on exchange */ +#define LPFC_NVME_XBUSY 0x4 /* XB bit set on IO cmpl */ +#define LPFC_NVME_CTX_RLS 0x8 /* ctx free requested */ +#define LPFC_NVME_ABTS_RCV 0x10 /* ABTS received on exchange */ +#define LPFC_NVME_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ +#define LPFC_NVME_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ +#define LPFC_NVME_TNOTIFY 0x80 /* notify transport of abts */ + +struct lpfc_async_xchg_ctx { + union { + struct nvmefc_tgt_fcp_req fcp_req; + } hdlrctx; + struct list_head list; + struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; + struct nvmefc_ls_req *ls_req; + struct nvmefc_ls_rsp ls_rsp; + struct lpfc_iocbq *wqeq; + struct lpfc_iocbq *abort_wqeq; + spinlock_t ctxlock; /* protect flag access */ + uint32_t sid; + uint32_t offset; + uint16_t oxid; + uint16_t size; + uint16_t entry_cnt; + uint16_t cpu; + uint16_t idx; + uint16_t state; + uint16_t flag; + void *payload; + struct rqb_dmabuf *rqb_buffer; + struct lpfc_nvmet_ctxbuf *ctxbuf; + struct lpfc_sli4_hdw_queue *hdwq; + +#ifdef CONFIG_SCSI_LPFC_DEBUG_FS + uint64_t ts_isr_cmd; + uint64_t ts_cmd_nvme; + uint64_t ts_nvme_data; + uint64_t ts_data_wqput; + uint64_t ts_isr_data; + uint64_t ts_data_nvme; + uint64_t ts_nvme_status; + uint64_t ts_status_wqput; + uint64_t ts_isr_status; + uint64_t ts_status_nvme; +#endif +}; + + +/* routines found in lpfc_nvme.c */ +int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, + struct nvmefc_ls_req *pnvme_lsreq, + void (*gen_req_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)); +void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, + struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); +int __lpfc_nvme_ls_abort(struct lpfc_vport *vport, + struct lpfc_nodelist *ndlp, struct nvmefc_ls_req *pnvme_lsreq); + +/* routines found in lpfc_nvmet.c */ +int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, + uint16_t xri); +int __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, + struct nvmefc_ls_rsp *ls_rsp, + void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)); +void __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 565419bf8d74..32eb5e873e9b 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -36,10 +36,6 @@ #include <scsi/scsi_transport_fc.h> #include <scsi/fc/fc_fs.h> -#include <linux/nvme.h> -#include <linux/nvme-fc-driver.h> -#include <linux/nvme-fc.h> - #include "lpfc_version.h" #include "lpfc_hw4.h" #include "lpfc_hw.h" @@ -50,29 +46,25 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_logmsg.h" #include "lpfc_crtn.h" #include "lpfc_vport.h" #include "lpfc_debugfs.h" static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, + struct lpfc_async_xchg_ctx *, dma_addr_t rspbuf, uint16_t rspsize); static struct lpfc_iocbq *lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *); + struct lpfc_async_xchg_ctx *); static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, + struct lpfc_async_xchg_ctx *, uint32_t, uint16_t); static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, + struct lpfc_async_xchg_ctx *, uint32_t, uint16_t); -static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, - struct lpfc_nvmet_rcv_ctx *, - uint32_t, uint16_t); static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *, - struct lpfc_nvmet_rcv_ctx *); + struct lpfc_async_xchg_ctx *); static void lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *); static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf); @@ -221,10 +213,10 @@ lpfc_nvmet_cmd_template(void) } #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) -static struct lpfc_nvmet_rcv_ctx * +static struct lpfc_async_xchg_ctx * lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; unsigned long iflag; bool found = false; @@ -243,10 +235,10 @@ lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri) return NULL; } -static struct lpfc_nvmet_rcv_ctx * +static struct lpfc_async_xchg_ctx * lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; unsigned long iflag; bool found = false; @@ -267,7 +259,8 @@ lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid) #endif static void -lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) +lpfc_nvmet_defer_release(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *ctxp) { lockdep_assert_held(&ctxp->ctxlock); @@ -275,10 +268,10 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) "6313 NVMET Defer ctx release oxid x%x flg x%x\n", ctxp->oxid, ctxp->flag); - if (ctxp->flag & LPFC_NVMET_CTX_RLS) + if (ctxp->flag & LPFC_NVME_CTX_RLS) return; - ctxp->flag |= LPFC_NVMET_CTX_RLS; + ctxp->flag |= LPFC_NVME_CTX_RLS; spin_lock(&phba->sli4_hba.t_active_list_lock); list_del(&ctxp->list); spin_unlock(&phba->sli4_hba.t_active_list_lock); @@ -288,6 +281,53 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) } /** + * __lpfc_nvme_xmt_ls_rsp_cmp - Generic completion handler for the + * transmission of an NVME LS response. + * @phba: Pointer to HBA context object. + * @cmdwqe: Pointer to driver command WQE object. + * @wcqe: Pointer to driver response CQE object. + * + * The function is called from SLI ring event handler with no + * lock held. The function frees memory resources used for the command + * used to send the NVME LS RSP. + **/ +void +__lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) +{ + struct lpfc_async_xchg_ctx *axchg = cmdwqe->context2; + struct nvmefc_ls_rsp *ls_rsp = &axchg->ls_rsp; + uint32_t status, result; + + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; + result = wcqe->parameter; + + if (axchg->state != LPFC_NVME_STE_LS_RSP || axchg->entry_cnt != 2) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6410 NVMEx LS cmpl state mismatch IO x%x: " + "%d %d\n", + axchg->oxid, axchg->state, axchg->entry_cnt); + } + + lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x result x%x\n", + axchg->oxid, status, result); + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, + "6038 NVMEx LS rsp cmpl: %d %d oxid x%x\n", + status, result, axchg->oxid); + + lpfc_nlp_put(cmdwqe->context1); + cmdwqe->context2 = NULL; + cmdwqe->context3 = NULL; + lpfc_sli_release_iocbq(phba, cmdwqe); + ls_rsp->done(ls_rsp); + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, + "6200 NVMEx LS rsp cmpl done status %d oxid x%x\n", + status, axchg->oxid); + kfree(axchg); +} + +/** * lpfc_nvmet_xmt_ls_rsp_cmp - Completion handler for LS Response * @phba: Pointer to HBA context object. * @cmdwqe: Pointer to driver command WQE object. @@ -295,33 +335,23 @@ lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) * * The function is called from SLI ring event handler with no * lock held. This function is the completion handler for NVME LS commands - * The function frees memory resources used for the NVME commands. + * The function updates any states and statistics, then calls the + * generic completion handler to free resources. **/ static void lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { struct lpfc_nvmet_tgtport *tgtp; - struct nvmefc_tgt_ls_req *rsp; - struct lpfc_nvmet_rcv_ctx *ctxp; uint32_t status, result; - status = bf_get(lpfc_wcqe_c_status, wcqe); - result = wcqe->parameter; - ctxp = cmdwqe->context2; - - if (ctxp->state != LPFC_NVMET_STE_LS_RSP || ctxp->entry_cnt != 2) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6410 NVMET LS cmpl state mismatch IO x%x: " - "%d %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); - } - if (!phba->targetport) - goto out; + goto finish; - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; + result = wcqe->parameter; + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; if (tgtp) { if (status) { atomic_inc(&tgtp->xmt_ls_rsp_error); @@ -334,22 +364,8 @@ lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, } } -out: - rsp = &ctxp->ctx.ls_req; - - lpfc_nvmeio_data(phba, "NVMET LS CMPL: xri x%x stat x%x result x%x\n", - ctxp->oxid, status, result); - - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, - "6038 NVMET LS rsp cmpl: %d %d oxid x%x\n", - status, result, ctxp->oxid); - - lpfc_nlp_put(cmdwqe->context1); - cmdwqe->context2 = NULL; - cmdwqe->context3 = NULL; - lpfc_sli_release_iocbq(phba, cmdwqe); - rsp->done(rsp); - kfree(ctxp); +finish: + __lpfc_nvme_xmt_ls_rsp_cmp(phba, cmdwqe, wcqe); } /** @@ -369,7 +385,7 @@ void lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) - struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; + struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct rqb_dmabuf *nvmebuf; @@ -378,7 +394,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) int cpu; unsigned long iflag; - if (ctxp->state == LPFC_NVMET_STE_FREE) { + if (ctxp->state == LPFC_NVME_STE_FREE) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6411 NVMET free, already free IO x%x: %d %d\n", ctxp->oxid, ctxp->state, ctxp->entry_cnt); @@ -390,8 +406,8 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) /* check if freed in another path whilst acquiring lock */ if (nvmebuf) { ctxp->rqb_buffer = NULL; - if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) { - ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ; + if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) { + ctxp->flag &= ~LPFC_NVME_CTX_REUSE_WQ; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); nvmebuf->hrq->rqbp->rqb_free_buffer(phba, nvmebuf); @@ -404,7 +420,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) spin_unlock_irqrestore(&ctxp->ctxlock, iflag); } } - ctxp->state = LPFC_NVMET_STE_FREE; + ctxp->state = LPFC_NVME_STE_FREE; spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag); if (phba->sli4_hba.nvmet_io_wait_cnt) { @@ -421,14 +437,14 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) size = nvmebuf->bytes_recv; sid = sli4_sid_from_fc_hdr(fc_hdr); - ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; + ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context; ctxp->wqeq = NULL; ctxp->offset = 0; ctxp->phba = phba; ctxp->size = size; ctxp->oxid = oxid; ctxp->sid = sid; - ctxp->state = LPFC_NVMET_STE_RCV; + ctxp->state = LPFC_NVME_STE_RCV; ctxp->entry_cnt = 1; ctxp->flag = 0; ctxp->ctxbuf = ctx_buf; @@ -453,7 +469,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) /* Indicate that a replacement buffer has been posted */ spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= LPFC_NVMET_CTX_REUSE_WQ; + ctxp->flag |= LPFC_NVME_CTX_REUSE_WQ; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); if (!queue_work(phba->wq, &ctx_buf->defer_work)) { @@ -495,7 +511,7 @@ lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) #ifdef CONFIG_SCSI_LPFC_DEBUG_FS static void lpfc_nvmet_ktime(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp) + struct lpfc_async_xchg_ctx *ctxp) { uint64_t seg1, seg2, seg3, seg4, seg5; uint64_t seg6, seg7, seg8, seg9, seg10; @@ -704,16 +720,16 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, { struct lpfc_nvmet_tgtport *tgtp; struct nvmefc_tgt_fcp_req *rsp; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; uint32_t status, result, op, start_clean, logerr; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS int id; #endif ctxp = cmdwqe->context2; - ctxp->flag &= ~LPFC_NVMET_IO_INP; + ctxp->flag &= ~LPFC_NVME_IO_INP; - rsp = &ctxp->ctx.fcp_req; + rsp = &ctxp->hdlrctx.fcp_req; op = rsp->op; status = bf_get(lpfc_wcqe_c_status, wcqe); @@ -740,13 +756,13 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, /* pick up SLI4 exhange busy condition */ if (bf_get(lpfc_wcqe_c_xb, wcqe)) { - ctxp->flag |= LPFC_NVMET_XBUSY; + ctxp->flag |= LPFC_NVME_XBUSY; logerr |= LOG_NVME_ABTS; if (tgtp) atomic_inc(&tgtp->xmt_fcp_rsp_xb_set); } else { - ctxp->flag &= ~LPFC_NVMET_XBUSY; + ctxp->flag &= ~LPFC_NVME_XBUSY; } lpfc_printf_log(phba, KERN_INFO, logerr, @@ -768,7 +784,7 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, if ((op == NVMET_FCOP_READDATA_RSP) || (op == NVMET_FCOP_RSP)) { /* Sanity check */ - ctxp->state = LPFC_NVMET_STE_DONE; + ctxp->state = LPFC_NVME_STE_DONE; ctxp->entry_cnt++; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -826,17 +842,32 @@ lpfc_nvmet_xmt_fcp_op_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, #endif } -static int -lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, - struct nvmefc_tgt_ls_req *rsp) +/** + * __lpfc_nvme_xmt_ls_rsp - Generic service routine to issue transmit + * an NVME LS rsp for a prior NVME LS request that was received. + * @axchg: pointer to exchange context for the NVME LS request the response + * is for. + * @ls_rsp: pointer to the transport LS RSP that is to be sent + * @xmt_ls_rsp_cmp: completion routine to call upon RSP transmit done + * + * This routine is used to format and send a WQE to transmit a NVME LS + * Response. The response is for a prior NVME LS request that was + * received and posted to the transport. + * + * Returns: + * 0 : if response successfully transmit + * non-zero : if response failed to transmit, of the form -Exxx. + **/ +int +__lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, + struct nvmefc_ls_rsp *ls_rsp, + void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, + struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe)) { - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_req); - struct lpfc_hba *phba = ctxp->phba; - struct hbq_dmabuf *nvmebuf = - (struct hbq_dmabuf *)ctxp->rqb_buffer; + struct lpfc_hba *phba = axchg->phba; + struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)axchg->rqb_buffer; struct lpfc_iocbq *nvmewqeq; - struct lpfc_nvmet_tgtport *nvmep = tgtport->private; struct lpfc_dmabuf dmabuf; struct ulp_bde64 bpl; int rc; @@ -844,34 +875,28 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, if (phba->pport->load_flag & FC_UNLOADING) return -ENODEV; - if (phba->pport->load_flag & FC_UNLOADING) - return -ENODEV; - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, - "6023 NVMET LS rsp oxid x%x\n", ctxp->oxid); + "6023 NVMEx LS rsp oxid x%x\n", axchg->oxid); - if ((ctxp->state != LPFC_NVMET_STE_LS_RCV) || - (ctxp->entry_cnt != 1)) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6412 NVMET LS rsp state mismatch " + if (axchg->state != LPFC_NVME_STE_LS_RCV || axchg->entry_cnt != 1) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6412 NVMEx LS rsp state mismatch " "oxid x%x: %d %d\n", - ctxp->oxid, ctxp->state, ctxp->entry_cnt); + axchg->oxid, axchg->state, axchg->entry_cnt); + return -EALREADY; } - ctxp->state = LPFC_NVMET_STE_LS_RSP; - ctxp->entry_cnt++; + axchg->state = LPFC_NVME_STE_LS_RSP; + axchg->entry_cnt++; - nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma, - rsp->rsplen); + nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, axchg, ls_rsp->rspdma, + ls_rsp->rsplen); if (nvmewqeq == NULL) { - atomic_inc(&nvmep->xmt_ls_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6150 LS Drop IO x%x: Prep\n", - ctxp->oxid); - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - atomic_inc(&nvmep->xmt_ls_abort); - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, - ctxp->sid, ctxp->oxid); - return -ENOMEM; + lpfc_printf_log(phba, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS, + "6150 NVMEx LS Drop Rsp x%x: Prep\n", + axchg->oxid); + rc = -ENOMEM; + goto out_free_buf; } /* Save numBdes for bpl2sgl */ @@ -881,39 +906,106 @@ lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, dmabuf.virt = &bpl; bpl.addrLow = nvmewqeq->wqe.xmit_sequence.bde.addrLow; bpl.addrHigh = nvmewqeq->wqe.xmit_sequence.bde.addrHigh; - bpl.tus.f.bdeSize = rsp->rsplen; + bpl.tus.f.bdeSize = ls_rsp->rsplen; bpl.tus.f.bdeFlags = 0; bpl.tus.w = le32_to_cpu(bpl.tus.w); + /* + * Note: although we're using stack space for the dmabuf, the + * call to lpfc_sli4_issue_wqe is synchronous, so it will not + * be referenced after it returns back to this routine. + */ - nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_rsp_cmp; + nvmewqeq->wqe_cmpl = xmt_ls_rsp_cmp; nvmewqeq->iocb_cmpl = NULL; - nvmewqeq->context2 = ctxp; + nvmewqeq->context2 = axchg; - lpfc_nvmeio_data(phba, "NVMET LS RESP: xri x%x wqidx x%x len x%x\n", - ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen); + lpfc_nvmeio_data(phba, "NVMEx LS RSP: xri x%x wqidx x%x len x%x\n", + axchg->oxid, nvmewqeq->hba_wqidx, ls_rsp->rsplen); + + rc = lpfc_sli4_issue_wqe(phba, axchg->hdwq, nvmewqeq); + + /* clear to be sure there's no reference */ + nvmewqeq->context3 = NULL; - rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); if (rc == WQE_SUCCESS) { /* * Okay to repost buffer here, but wait till cmpl * before freeing ctxp and iocbq. */ lpfc_in_buf_free(phba, &nvmebuf->dbuf); - atomic_inc(&nvmep->xmt_ls_rsp); return 0; } - /* Give back resources */ - atomic_inc(&nvmep->xmt_ls_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6151 LS Drop IO x%x: Issue %d\n", - ctxp->oxid, rc); + + lpfc_printf_log(phba, KERN_ERR, + LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS, + "6151 NVMEx LS RSP x%x: failed to transmit %d\n", + axchg->oxid, rc); + + rc = -ENXIO; lpfc_nlp_put(nvmewqeq->context1); +out_free_buf: + /* Give back resources */ lpfc_in_buf_free(phba, &nvmebuf->dbuf); - atomic_inc(&nvmep->xmt_ls_abort); - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); - return -ENXIO; + + /* + * As transport doesn't track completions of responses, if the rsp + * fails to send, the transport will effectively ignore the rsp + * and consider the LS done. However, the driver has an active + * exchange open for the LS - so be sure to abort the exchange + * if the response isn't sent. + */ + lpfc_nvme_unsol_ls_issue_abort(phba, axchg, axchg->sid, axchg->oxid); + return rc; +} + +/** + * lpfc_nvmet_xmt_ls_rsp - Transmit NVME LS response + * @tgtport: pointer to target port that NVME LS is to be transmit from. + * @ls_rsp: pointer to the transport LS RSP that is to be sent + * + * Driver registers this routine to transmit responses for received NVME + * LS requests. + * + * This routine is used to format and send a WQE to transmit a NVME LS + * Response. The ls_rsp is used to reverse-map the LS to the original + * NVME LS request sequence, which provides addressing information for + * the remote port the LS to be sent to, as well as the exchange id + * that is the LS is bound to. + * + * Returns: + * 0 : if response successfully transmit + * non-zero : if response failed to transmit, of the form -Exxx. + **/ +static int +lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, + struct nvmefc_ls_rsp *ls_rsp) +{ + struct lpfc_async_xchg_ctx *axchg = + container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp); + struct lpfc_nvmet_tgtport *nvmep = tgtport->private; + int rc; + + if (axchg->phba->pport->load_flag & FC_UNLOADING) + return -ENODEV; + + rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, lpfc_nvmet_xmt_ls_rsp_cmp); + + if (rc) { + atomic_inc(&nvmep->xmt_ls_drop); + /* + * unless the failure is due to having already sent + * the response, an abort will be generated for the + * exchange if the rsp can't be sent. + */ + if (rc != -EALREADY) + atomic_inc(&nvmep->xmt_ls_abort); + return rc; + } + + atomic_inc(&nvmep->xmt_ls_rsp); + return 0; } static int @@ -921,8 +1013,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; struct lpfc_queue *wq; struct lpfc_iocbq *nvmewqeq; @@ -968,8 +1060,8 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, #endif /* Sanity check */ - if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) || - (ctxp->state == LPFC_NVMET_STE_ABORT)) { + if ((ctxp->flag & LPFC_NVME_ABTS_RCV) || + (ctxp->state == LPFC_NVME_STE_ABORT)) { atomic_inc(&lpfc_nvmep->xmt_fcp_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6102 IO oxid x%x aborted\n", @@ -997,7 +1089,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", ctxp->oxid, rsp->op, rsp->rsplen); - ctxp->flag |= LPFC_NVMET_IO_INP; + ctxp->flag |= LPFC_NVME_IO_INP; rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); if (rc == WQE_SUCCESS) { #ifdef CONFIG_SCSI_LPFC_DEBUG_FS @@ -1016,7 +1108,7 @@ lpfc_nvmet_xmt_fcp_op(struct nvmet_fc_target_port *tgtport, * WQ was full, so queue nvmewqeq to be sent after * WQE release CQE */ - ctxp->flag |= LPFC_NVMET_DEFER_WQFULL; + ctxp->flag |= LPFC_NVME_DEFER_WQFULL; wq = ctxp->hdwq->io_wq; pring = wq->pring; spin_lock_irqsave(&pring->ring_lock, iflags); @@ -1056,8 +1148,8 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *req) { struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(req, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; struct lpfc_queue *wq; unsigned long flags; @@ -1085,13 +1177,13 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, /* Since iaab/iaar are NOT set, we need to check * if the firmware is in process of aborting IO */ - if (ctxp->flag & (LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP)) { + if (ctxp->flag & (LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP)) { spin_unlock_irqrestore(&ctxp->ctxlock, flags); return; } - ctxp->flag |= LPFC_NVMET_ABORT_OP; + ctxp->flag |= LPFC_NVME_ABORT_OP; - if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) { + if (ctxp->flag & LPFC_NVME_DEFER_WQFULL) { spin_unlock_irqrestore(&ctxp->ctxlock, flags); lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); @@ -1101,11 +1193,11 @@ lpfc_nvmet_xmt_fcp_abort(struct nvmet_fc_target_port *tgtport, } spin_unlock_irqrestore(&ctxp->ctxlock, flags); - /* An state of LPFC_NVMET_STE_RCV means we have just received + /* A state of LPFC_NVME_STE_RCV means we have just received * the NVME command and have not started processing it. * (by issuing any IO WQEs on this exchange yet) */ - if (ctxp->state == LPFC_NVMET_STE_RCV) + if (ctxp->state == LPFC_NVME_STE_RCV) lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); else @@ -1118,26 +1210,26 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct lpfc_hba *phba = ctxp->phba; unsigned long flags; bool aborting = false; spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_XBUSY) + if (ctxp->flag & LPFC_NVME_XBUSY) lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, "6027 NVMET release with XBUSY flag x%x" " oxid x%x\n", ctxp->flag, ctxp->oxid); - else if (ctxp->state != LPFC_NVMET_STE_DONE && - ctxp->state != LPFC_NVMET_STE_ABORT) + else if (ctxp->state != LPFC_NVME_STE_DONE && + ctxp->state != LPFC_NVME_STE_ABORT) lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6413 NVMET release bad state %d %d oxid x%x\n", ctxp->state, ctxp->entry_cnt, ctxp->oxid); - if ((ctxp->flag & LPFC_NVMET_ABORT_OP) || - (ctxp->flag & LPFC_NVMET_XBUSY)) { + if ((ctxp->flag & LPFC_NVME_ABORT_OP) || + (ctxp->flag & LPFC_NVME_XBUSY)) { aborting = true; /* let the abort path do the real release */ lpfc_nvmet_defer_release(phba, ctxp); @@ -1148,7 +1240,7 @@ lpfc_nvmet_xmt_fcp_release(struct nvmet_fc_target_port *tgtport, ctxp->state, aborting); atomic_inc(&lpfc_nvmep->xmt_fcp_release); - ctxp->flag &= ~LPFC_NVMET_TNOTIFY; + ctxp->flag &= ~LPFC_NVME_TNOTIFY; if (aborting) return; @@ -1161,8 +1253,8 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *rsp) { struct lpfc_nvmet_tgtport *tgtp; - struct lpfc_nvmet_rcv_ctx *ctxp = - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); + struct lpfc_async_xchg_ctx *ctxp = + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; struct lpfc_hba *phba = ctxp->phba; unsigned long iflag; @@ -1190,6 +1282,116 @@ lpfc_nvmet_defer_rcv(struct nvmet_fc_target_port *tgtport, spin_unlock_irqrestore(&ctxp->ctxlock, iflag); } +/** + * lpfc_nvmet_ls_req_cmp - completion handler for a nvme ls request + * @phba: Pointer to HBA context object + * @cmdwqe: Pointer to driver command WQE object. + * @wcqe: Pointer to driver response CQE object. + * + * This function is the completion handler for NVME LS requests. + * The function updates any states and statistics, then calls the + * generic completion handler to finish completion of the request. + **/ +static void +lpfc_nvmet_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, + struct lpfc_wcqe_complete *wcqe) +{ + __lpfc_nvme_ls_req_cmp(phba, cmdwqe->vport, cmdwqe, wcqe); +} + +/** + * lpfc_nvmet_ls_req - Issue an Link Service request + * @targetport - pointer to target instance registered with nvmet transport. + * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv. + * Driver sets this value to the ndlp pointer. + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS + * + * Driver registers this routine to handle any link service request + * from the nvme_fc transport to a remote nvme-aware port. + * + * Return value : + * 0 - Success + * non-zero: various error codes, in form of -Exxx + **/ +static int +lpfc_nvmet_ls_req(struct nvmet_fc_target_port *targetport, + void *hosthandle, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private; + struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; + int ret; + u32 hstate; + + if (!lpfc_nvmet) + return -EINVAL; + + phba = lpfc_nvmet->phba; + if (phba->pport->load_flag & FC_UNLOADING) + return -EINVAL; + + hstate = atomic_read(&lpfc_nvmet->state); + if (hstate == LPFC_NVMET_INV_HOST_ACTIVE) + return -EACCES; + + ndlp = (struct lpfc_nodelist *)hosthandle; + + ret = __lpfc_nvme_ls_req(phba->pport, ndlp, pnvme_lsreq, + lpfc_nvmet_ls_req_cmp); + + return ret; +} + +/** + * lpfc_nvmet_ls_abort - Abort a prior NVME LS request + * @targetport: Transport targetport, that LS was issued from. + * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv. + * Driver sets this value to the ndlp pointer. + * @pnvme_lsreq - the transport nvme_ls_req structure for LS to be aborted + * + * Driver registers this routine to abort an NVME LS request that is + * in progress (from the transports perspective). + **/ +static void +lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport, + void *hosthandle, + struct nvmefc_ls_req *pnvme_lsreq) +{ + struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private; + struct lpfc_hba *phba; + struct lpfc_nodelist *ndlp; + int ret; + + phba = lpfc_nvmet->phba; + if (phba->pport->load_flag & FC_UNLOADING) + return; + + ndlp = (struct lpfc_nodelist *)hosthandle; + + ret = __lpfc_nvme_ls_abort(phba->pport, ndlp, pnvme_lsreq); + if (!ret) + atomic_inc(&lpfc_nvmet->xmt_ls_abort); +} + +static void +lpfc_nvmet_host_release(void *hosthandle) +{ + struct lpfc_nodelist *ndlp = hosthandle; + struct lpfc_hba *phba = NULL; + struct lpfc_nvmet_tgtport *tgtp; + + phba = ndlp->phba; + if (!phba->targetport || !phba->targetport->private) + return; + + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, + "6202 NVMET XPT releasing hosthandle x%px\n", + hosthandle); + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + atomic_set(&tgtp->state, 0); +} + static void lpfc_nvmet_discovery_event(struct nvmet_fc_target_port *tgtport) { @@ -1214,6 +1416,9 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .fcp_req_release = lpfc_nvmet_xmt_fcp_release, .defer_rcv = lpfc_nvmet_defer_rcv, .discovery_event = lpfc_nvmet_discovery_event, + .ls_req = lpfc_nvmet_ls_req, + .ls_abort = lpfc_nvmet_ls_abort, + .host_release = lpfc_nvmet_host_release, .max_hw_queues = 1, .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, @@ -1224,6 +1429,7 @@ static struct nvmet_fc_target_template lpfc_tgttemplate = { .target_features = 0, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct lpfc_nvmet_tgtport), + .lsrqst_priv_sz = 0, }; static void @@ -1368,7 +1574,7 @@ lpfc_nvmet_setup_io_context(struct lpfc_hba *phba) return -ENOMEM; } ctx_buf->context->ctxbuf = ctx_buf; - ctx_buf->context->state = LPFC_NVMET_STE_FREE; + ctx_buf->context->state = LPFC_NVME_STE_FREE; ctx_buf->iocbq = lpfc_sli_get_iocbq(phba); if (!ctx_buf->iocbq) { @@ -1568,7 +1774,7 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; struct lpfc_nvmet_tgtport *tgtp; struct nvmefc_tgt_fcp_req *req = NULL; struct lpfc_nodelist *ndlp; @@ -1599,12 +1805,12 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, /* Check if we already received a free context call * and we have completed processing an abort situation. */ - if (ctxp->flag & LPFC_NVMET_CTX_RLS && - !(ctxp->flag & LPFC_NVMET_ABORT_OP)) { + if (ctxp->flag & LPFC_NVME_CTX_RLS && + !(ctxp->flag & LPFC_NVME_ABORT_OP)) { list_del_init(&ctxp->list); released = true; } - ctxp->flag &= ~LPFC_NVMET_XBUSY; + ctxp->flag &= ~LPFC_NVME_XBUSY; spin_unlock(&ctxp->ctxlock); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); @@ -1646,15 +1852,15 @@ lpfc_sli4_nvmet_xri_aborted(struct lpfc_hba *phba, rxid); spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= LPFC_NVMET_ABTS_RCV; - ctxp->state = LPFC_NVMET_STE_ABORT; + ctxp->flag |= LPFC_NVME_ABTS_RCV; + ctxp->state = LPFC_NVME_STE_ABORT; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); lpfc_nvmeio_data(phba, "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", xri, raw_smp_processor_id(), 0); - req = &ctxp->ctx.fcp_req; + req = &ctxp->hdlrctx.fcp_req; if (req) nvmet_fc_rcv_fcp_abort(phba->targetport, req); } @@ -1667,7 +1873,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_hba *phba = vport->phba; - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; struct nvmefc_tgt_fcp_req *rsp; uint32_t sid; uint16_t oxid, xri; @@ -1690,7 +1896,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, spin_unlock_irqrestore(&phba->hbalock, iflag); spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= LPFC_NVMET_ABTS_RCV; + ctxp->flag |= LPFC_NVME_ABTS_RCV; spin_unlock_irqrestore(&ctxp->ctxlock, iflag); lpfc_nvmeio_data(phba, @@ -1700,7 +1906,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, "6319 NVMET Rcv ABTS:acc xri x%x\n", xri); - rsp = &ctxp->ctx.fcp_req; + rsp = &ctxp->hdlrctx.fcp_req; nvmet_fc_rcv_fcp_abort(phba->targetport, rsp); /* Respond with BA_ACC accordingly */ @@ -1759,7 +1965,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, xri = ctxp->ctxbuf->sglq->sli4_xritag; spin_lock_irqsave(&ctxp->ctxlock, iflag); - ctxp->flag |= (LPFC_NVMET_ABTS_RCV | LPFC_NVMET_ABORT_OP); + ctxp->flag |= (LPFC_NVME_ABTS_RCV | LPFC_NVME_ABORT_OP); spin_unlock_irqrestore(&ctxp->ctxlock, iflag); lpfc_nvmeio_data(phba, @@ -1771,10 +1977,10 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, "flag x%x state x%x\n", ctxp->oxid, xri, ctxp->flag, ctxp->state); - if (ctxp->flag & LPFC_NVMET_TNOTIFY) { + if (ctxp->flag & LPFC_NVME_TNOTIFY) { /* Notify the transport */ nvmet_fc_rcv_fcp_abort(phba->targetport, - &ctxp->ctx.fcp_req); + &ctxp->hdlrctx.fcp_req); } else { cancel_work_sync(&ctxp->ctxbuf->defer_work); spin_lock_irqsave(&ctxp->ctxlock, iflag); @@ -1802,7 +2008,7 @@ lpfc_nvmet_rcv_unsol_abort(struct lpfc_vport *vport, static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq, - struct lpfc_nvmet_rcv_ctx *ctxp) + struct lpfc_async_xchg_ctx *ctxp) { struct lpfc_sli_ring *pring; struct lpfc_iocbq *nvmewqeq; @@ -1853,7 +2059,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) struct lpfc_sli_ring *pring; struct lpfc_iocbq *nvmewqeq; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; unsigned long iflags; int rc; @@ -1867,7 +2073,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq, list); spin_unlock_irqrestore(&pring->ring_lock, iflags); - ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmewqeq->context2; + ctxp = (struct lpfc_async_xchg_ctx *)nvmewqeq->context2; rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); spin_lock_irqsave(&pring->ring_lock, iflags); if (rc == -EBUSY) { @@ -1879,7 +2085,7 @@ lpfc_nvmet_wqfull_process(struct lpfc_hba *phba, if (rc == WQE_SUCCESS) { #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (ctxp->ts_cmd_nvme) { - if (ctxp->ctx.fcp_req.op == NVMET_FCOP_RSP) + if (ctxp->hdlrctx.fcp_req.op == NVMET_FCOP_RSP) ctxp->ts_status_wqput = ktime_get_ns(); else ctxp->ts_data_wqput = ktime_get_ns(); @@ -1926,114 +2132,61 @@ lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba) } /** - * lpfc_nvmet_unsol_ls_buffer - Process an unsolicited event data buffer + * lpfc_nvmet_handle_lsreq - Process an NVME LS request * @phba: pointer to lpfc hba data structure. - * @pring: pointer to a SLI ring. - * @nvmebuf: pointer to lpfc nvme command HBQ data structure. + * @axchg: pointer to exchange context for the NVME LS request * - * This routine is used for processing the WQE associated with a unsolicited - * event. It first determines whether there is an existing ndlp that matches - * the DID from the unsolicited WQE. If not, it will create a new one with - * the DID from the unsolicited WQE. The ELS command from the unsolicited - * WQE is then used to invoke the proper routine and to set up proper state - * of the discovery state machine. - **/ -static void -lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - struct hbq_dmabuf *nvmebuf) + * This routine is used for processing an asychronously received NVME LS + * request. Any remaining validation is done and the LS is then forwarded + * to the nvmet-fc transport via nvmet_fc_rcv_ls_req(). + * + * The calling sequence should be: nvmet_fc_rcv_ls_req() -> (processing) + * -> lpfc_nvmet_xmt_ls_rsp/cmp -> req->done. + * lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg. + * + * Returns 0 if LS was handled and delivered to the transport + * Returns 1 if LS failed to be handled and should be dropped + */ +int +lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *axchg) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) - struct lpfc_nvmet_tgtport *tgtp; - struct fc_frame_header *fc_hdr; - struct lpfc_nvmet_rcv_ctx *ctxp; - uint32_t *payload; - uint32_t size, oxid, sid, rc; - - - if (!nvmebuf || !phba->targetport) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6154 LS Drop IO\n"); - oxid = 0; - size = 0; - sid = 0; - ctxp = NULL; - goto dropit; - } - - fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); - oxid = be16_to_cpu(fc_hdr->fh_ox_id); - - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - payload = (uint32_t *)(nvmebuf->dbuf.virt); - size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); - sid = sli4_sid_from_fc_hdr(fc_hdr); + struct lpfc_nvmet_tgtport *tgtp = phba->targetport->private; + uint32_t *payload = axchg->payload; + int rc; - ctxp = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_ATOMIC); - if (ctxp == NULL) { - atomic_inc(&tgtp->rcv_ls_req_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6155 LS Drop IO x%x: Alloc\n", - oxid); -dropit: - lpfc_nvmeio_data(phba, "NVMET LS DROP: " - "xri x%x sz %d from %06x\n", - oxid, size, sid); - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - return; - } - ctxp->phba = phba; - ctxp->size = size; - ctxp->oxid = oxid; - ctxp->sid = sid; - ctxp->wqeq = NULL; - ctxp->state = LPFC_NVMET_STE_LS_RCV; - ctxp->entry_cnt = 1; - ctxp->rqb_buffer = (void *)nvmebuf; - ctxp->hdwq = &phba->sli4_hba.hdwq[0]; + atomic_inc(&tgtp->rcv_ls_req_in); - lpfc_nvmeio_data(phba, "NVMET LS RCV: xri x%x sz %d from %06x\n", - oxid, size, sid); /* - * The calling sequence should be: - * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done - * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp. + * Driver passes the ndlp as the hosthandle argument allowing + * the transport to generate LS requests for any associateions + * that are created. */ - atomic_inc(&tgtp->rcv_ls_req_in); - rc = nvmet_fc_rcv_ls_req(phba->targetport, &ctxp->ctx.ls_req, - payload, size); + rc = nvmet_fc_rcv_ls_req(phba->targetport, axchg->ndlp, &axchg->ls_rsp, + axchg->payload, axchg->size); lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, "6037 NVMET Unsol rcv: sz %d rc %d: %08x %08x %08x " - "%08x %08x %08x\n", size, rc, + "%08x %08x %08x\n", axchg->size, rc, *payload, *(payload+1), *(payload+2), *(payload+3), *(payload+4), *(payload+5)); - if (rc == 0) { + if (!rc) { atomic_inc(&tgtp->rcv_ls_req_out); - return; + return 0; } - lpfc_nvmeio_data(phba, "NVMET LS DROP: xri x%x sz %d from %06x\n", - oxid, size, sid); - atomic_inc(&tgtp->rcv_ls_req_drop); - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n", - ctxp->oxid, rc); - - /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */ - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - - atomic_inc(&tgtp->xmt_ls_abort); - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid); #endif + return 1; } static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) { #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) - struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; + struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context; struct lpfc_hba *phba = ctxp->phba; struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; struct lpfc_nvmet_tgtport *tgtp; @@ -2054,7 +2207,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) return; } - if (ctxp->flag & LPFC_NVMET_ABTS_RCV) { + if (ctxp->flag & LPFC_NVME_ABTS_RCV) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6324 IO oxid x%x aborted\n", ctxp->oxid); @@ -2063,7 +2216,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) payload = (uint32_t *)(nvmebuf->dbuf.virt); tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - ctxp->flag |= LPFC_NVMET_TNOTIFY; + ctxp->flag |= LPFC_NVME_TNOTIFY; #ifdef CONFIG_SCSI_LPFC_DEBUG_FS if (ctxp->ts_isr_cmd) ctxp->ts_cmd_nvme = ktime_get_ns(); @@ -2077,13 +2230,13 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) * A buffer has already been reposted for this IO, so just free * the nvmebuf. */ - rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req, + rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->hdlrctx.fcp_req, payload, ctxp->size); /* Process FCP command */ if (rc == 0) { atomic_inc(&tgtp->rcv_fcp_cmd_out); spin_lock_irqsave(&ctxp->ctxlock, iflags); - if ((ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) || + if ((ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) || (nvmebuf != ctxp->rqb_buffer)) { spin_unlock_irqrestore(&ctxp->ctxlock, iflags); return; @@ -2102,7 +2255,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) atomic_inc(&tgtp->rcv_fcp_cmd_out); atomic_inc(&tgtp->defer_fod); spin_lock_irqsave(&ctxp->ctxlock, iflags); - if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) { + if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) { spin_unlock_irqrestore(&ctxp->ctxlock, iflags); return; } @@ -2117,7 +2270,7 @@ lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) phba->sli4_hba.nvmet_mrq_data[qno], 1, qno); return; } - ctxp->flag &= ~LPFC_NVMET_TNOTIFY; + ctxp->flag &= ~LPFC_NVME_TNOTIFY; atomic_inc(&tgtp->rcv_fcp_cmd_drop); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n", @@ -2224,7 +2377,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, uint64_t isr_timestamp, uint8_t cqflag) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; struct fc_frame_header *fc_hdr; struct lpfc_nvmet_ctxbuf *ctx_buf; @@ -2306,11 +2459,11 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, sid = sli4_sid_from_fc_hdr(fc_hdr); - ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; + ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context; spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag); list_add_tail(&ctxp->list, &phba->sli4_hba.t_active_ctx_list); spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag); - if (ctxp->state != LPFC_NVMET_STE_FREE) { + if (ctxp->state != LPFC_NVME_STE_FREE) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6414 NVMET Context corrupt %d %d oxid x%x\n", ctxp->state, ctxp->entry_cnt, ctxp->oxid); @@ -2322,7 +2475,7 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, ctxp->oxid = oxid; ctxp->sid = sid; ctxp->idx = idx; - ctxp->state = LPFC_NVMET_STE_RCV; + ctxp->state = LPFC_NVME_STE_RCV; ctxp->entry_cnt = 1; ctxp->flag = 0; ctxp->ctxbuf = ctx_buf; @@ -2369,40 +2522,6 @@ lpfc_nvmet_unsol_fcp_buffer(struct lpfc_hba *phba, } /** - * lpfc_nvmet_unsol_ls_event - Process an unsolicited event from an nvme nport - * @phba: pointer to lpfc hba data structure. - * @pring: pointer to a SLI ring. - * @nvmebuf: pointer to received nvme data structure. - * - * This routine is used to process an unsolicited event received from a SLI - * (Service Level Interface) ring. The actual processing of the data buffer - * associated with the unsolicited event is done by invoking the routine - * lpfc_nvmet_unsol_ls_buffer() after properly set up the buffer from the - * SLI RQ on which the unsolicited event was received. - **/ -void -lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, - struct lpfc_iocbq *piocb) -{ - struct lpfc_dmabuf *d_buf; - struct hbq_dmabuf *nvmebuf; - - d_buf = piocb->context2; - nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); - - if (!nvmebuf) { - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, - "3015 LS Drop IO\n"); - return; - } - if (phba->nvmet_support == 0) { - lpfc_in_buf_free(phba, &nvmebuf->dbuf); - return; - } - lpfc_nvmet_unsol_ls_buffer(phba, pring, nvmebuf); -} - -/** * lpfc_nvmet_unsol_fcp_event - Process an unsolicited event from an nvme nport * @phba: pointer to lpfc hba data structure. * @idx: relative index of MRQ vector @@ -2462,7 +2581,7 @@ lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, **/ static struct lpfc_iocbq * lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, dma_addr_t rspbuf, uint16_t rspsize) { struct lpfc_nodelist *ndlp; @@ -2584,9 +2703,9 @@ nvme_wqe_free_wqeq_exit: static struct lpfc_iocbq * lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp) + struct lpfc_async_xchg_ctx *ctxp) { - struct nvmefc_tgt_fcp_req *rsp = &ctxp->ctx.fcp_req; + struct nvmefc_tgt_fcp_req *rsp = &ctxp->hdlrctx.fcp_req; struct lpfc_nvmet_tgtport *tgtp; struct sli4_sge *sgl; struct lpfc_nodelist *ndlp; @@ -2647,9 +2766,9 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, } /* Sanity check */ - if (((ctxp->state == LPFC_NVMET_STE_RCV) && + if (((ctxp->state == LPFC_NVME_STE_RCV) && (ctxp->entry_cnt == 1)) || - (ctxp->state == LPFC_NVMET_STE_DATA)) { + (ctxp->state == LPFC_NVME_STE_DATA)) { wqe = &nvmewqe->wqe; } else { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, @@ -2912,7 +3031,7 @@ lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, sgl++; ctxp->offset += cnt; } - ctxp->state = LPFC_NVMET_STE_DATA; + ctxp->state = LPFC_NVME_STE_DATA; ctxp->entry_cnt++; return nvmewqe; } @@ -2931,7 +3050,7 @@ static void lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; uint32_t result; unsigned long flags; @@ -2941,23 +3060,23 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, result = wcqe->parameter; tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - if (ctxp->flag & LPFC_NVMET_ABORT_OP) + if (ctxp->flag & LPFC_NVME_ABORT_OP) atomic_inc(&tgtp->xmt_fcp_abort_cmpl); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->state = LPFC_NVMET_STE_DONE; + ctxp->state = LPFC_NVME_STE_DONE; /* Check if we already received a free context call * and we have completed processing an abort situation. */ - if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && - !(ctxp->flag & LPFC_NVMET_XBUSY)) { + if ((ctxp->flag & LPFC_NVME_CTX_RLS) && + !(ctxp->flag & LPFC_NVME_XBUSY)) { spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_del_init(&ctxp->list); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); released = true; } - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); atomic_inc(&tgtp->xmt_abort_rsp); @@ -2981,7 +3100,7 @@ lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, lpfc_sli_release_iocbq(phba, cmdwqe); /* Since iaab/iaar are NOT set, there is no work left. - * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted * should have been called already. */ } @@ -3000,7 +3119,7 @@ static void lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; unsigned long flags; uint32_t result; @@ -3020,11 +3139,11 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_ABORT_OP) + if (ctxp->flag & LPFC_NVME_ABORT_OP) atomic_inc(&tgtp->xmt_fcp_abort_cmpl); /* Sanity check */ - if (ctxp->state != LPFC_NVMET_STE_ABORT) { + if (ctxp->state != LPFC_NVME_STE_ABORT) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, "6112 ABTS Wrong state:%d oxid x%x\n", ctxp->state, ctxp->oxid); @@ -3033,15 +3152,15 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, /* Check if we already received a free context call * and we have completed processing an abort situation. */ - ctxp->state = LPFC_NVMET_STE_DONE; - if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && - !(ctxp->flag & LPFC_NVMET_XBUSY)) { + ctxp->state = LPFC_NVME_STE_DONE; + if ((ctxp->flag & LPFC_NVME_CTX_RLS) && + !(ctxp->flag & LPFC_NVME_XBUSY)) { spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_del_init(&ctxp->list); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); released = true; } - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); atomic_inc(&tgtp->xmt_abort_rsp); @@ -3062,7 +3181,7 @@ lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); /* Since iaab/iaar are NOT set, there is no work left. - * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted + * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted * should have been called already. */ } @@ -3081,15 +3200,17 @@ static void lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe) { - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_nvmet_tgtport *tgtp; uint32_t result; ctxp = cmdwqe->context2; result = wcqe->parameter; - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; - atomic_inc(&tgtp->xmt_ls_abort_cmpl); + if (phba->nvmet_support) { + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + atomic_inc(&tgtp->xmt_ls_abort_cmpl); + } lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, "6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n", @@ -3107,7 +3228,7 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, return; } - if (ctxp->state != LPFC_NVMET_STE_LS_ABORT) { + if (ctxp->state != LPFC_NVME_STE_LS_ABORT) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6416 NVMET LS abort cmpl state mismatch: " "oxid x%x: %d %d\n", @@ -3122,10 +3243,10 @@ lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, static int lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { - struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_tgtport *tgtp = NULL; struct lpfc_iocbq *abts_wqeq; union lpfc_wqe128 *wqe_abts; struct lpfc_nodelist *ndlp; @@ -3134,13 +3255,15 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, "6067 ABTS: sid %x xri x%x/x%x\n", sid, xri, ctxp->wqeq->sli4_xritag); - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (phba->nvmet_support && phba->targetport) + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; ndlp = lpfc_findnode_did(phba->pport, sid); if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { - atomic_inc(&tgtp->xmt_abort_rsp_error); + if (tgtp) + atomic_inc(&tgtp->xmt_abort_rsp_error); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, "6134 Drop ABTS - wrong NDLP state x%x.\n", (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); @@ -3217,7 +3340,7 @@ lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { struct lpfc_nvmet_tgtport *tgtp; @@ -3244,7 +3367,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, /* No failure to an ABTS request. */ spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } @@ -3258,13 +3381,13 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, "6161 ABORT failed: No wqeqs: " "xri: x%x\n", ctxp->oxid); /* No failure to an ABTS request. */ - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } abts_wqeq = ctxp->abort_wqeq; - ctxp->state = LPFC_NVMET_STE_ABORT; - opt = (ctxp->flag & LPFC_NVMET_ABTS_RCV) ? INHIBIT_ABORT : 0; + ctxp->state = LPFC_NVME_STE_ABORT; + opt = (ctxp->flag & LPFC_NVME_ABTS_RCV) ? INHIBIT_ABORT : 0; spin_unlock_irqrestore(&ctxp->ctxlock, flags); /* Announce entry to new IO submit field. */ @@ -3287,7 +3410,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, phba->hba_flag, ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } @@ -3302,7 +3425,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->oxid); lpfc_sli_release_iocbq(phba, abts_wqeq); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); return 0; } @@ -3331,7 +3454,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, atomic_inc(&tgtp->xmt_abort_rsp_error); spin_lock_irqsave(&ctxp->ctxlock, flags); - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; + ctxp->flag &= ~LPFC_NVME_ABORT_OP; spin_unlock_irqrestore(&ctxp->ctxlock, flags); lpfc_sli_release_iocbq(phba, abts_wqeq); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, @@ -3343,7 +3466,7 @@ lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { struct lpfc_nvmet_tgtport *tgtp; @@ -3358,14 +3481,14 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, ctxp->wqeq->hba_wqidx = 0; } - if (ctxp->state == LPFC_NVMET_STE_FREE) { + if (ctxp->state == LPFC_NVME_STE_FREE) { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6417 NVMET ABORT ctx freed %d %d oxid x%x\n", ctxp->state, ctxp->entry_cnt, ctxp->oxid); rc = WQE_BUSY; goto aerr; } - ctxp->state = LPFC_NVMET_STE_ABORT; + ctxp->state = LPFC_NVME_STE_ABORT; ctxp->entry_cnt++; rc = lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri); if (rc == 0) @@ -3387,13 +3510,13 @@ lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, aerr: spin_lock_irqsave(&ctxp->ctxlock, flags); - if (ctxp->flag & LPFC_NVMET_CTX_RLS) { + if (ctxp->flag & LPFC_NVME_CTX_RLS) { spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); list_del_init(&ctxp->list); spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); released = true; } - ctxp->flag &= ~(LPFC_NVMET_ABORT_OP | LPFC_NVMET_CTX_RLS); + ctxp->flag &= ~(LPFC_NVME_ABORT_OP | LPFC_NVME_CTX_RLS); spin_unlock_irqrestore(&ctxp->ctxlock, flags); atomic_inc(&tgtp->xmt_abort_rsp_error); @@ -3406,29 +3529,39 @@ aerr: return 1; } -static int -lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, - struct lpfc_nvmet_rcv_ctx *ctxp, +/** + * lpfc_nvme_unsol_ls_issue_abort - issue ABTS on an exchange received + * via async frame receive where the frame is not handled. + * @phba: pointer to adapter structure + * @ctxp: pointer to the asynchronously received received sequence + * @sid: address of the remote port to send the ABTS to + * @xri: oxid value to for the ABTS (other side's exchange id). + **/ +int +lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, uint16_t xri) { - struct lpfc_nvmet_tgtport *tgtp; + struct lpfc_nvmet_tgtport *tgtp = NULL; struct lpfc_iocbq *abts_wqeq; unsigned long flags; int rc; - if ((ctxp->state == LPFC_NVMET_STE_LS_RCV && ctxp->entry_cnt == 1) || - (ctxp->state == LPFC_NVMET_STE_LS_RSP && ctxp->entry_cnt == 2)) { - ctxp->state = LPFC_NVMET_STE_LS_ABORT; + if ((ctxp->state == LPFC_NVME_STE_LS_RCV && ctxp->entry_cnt == 1) || + (ctxp->state == LPFC_NVME_STE_LS_RSP && ctxp->entry_cnt == 2)) { + ctxp->state = LPFC_NVME_STE_LS_ABORT; ctxp->entry_cnt++; } else { lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, "6418 NVMET LS abort state mismatch " "IO x%x: %d %d\n", ctxp->oxid, ctxp->state, ctxp->entry_cnt); - ctxp->state = LPFC_NVMET_STE_LS_ABORT; + ctxp->state = LPFC_NVME_STE_LS_ABORT; } - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (phba->nvmet_support && phba->targetport) + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + if (!ctxp->wqeq) { /* Issue ABTS for this WQE based on iotag */ ctxp->wqeq = lpfc_sli_get_iocbq(phba); @@ -3455,16 +3588,44 @@ lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq); spin_unlock_irqrestore(&phba->hbalock, flags); if (rc == WQE_SUCCESS) { - atomic_inc(&tgtp->xmt_abort_unsol); + if (tgtp) + atomic_inc(&tgtp->xmt_abort_unsol); return 0; } out: - atomic_inc(&tgtp->xmt_abort_rsp_error); + if (tgtp) + atomic_inc(&tgtp->xmt_abort_rsp_error); abts_wqeq->context2 = NULL; abts_wqeq->context3 = NULL; lpfc_sli_release_iocbq(phba, abts_wqeq); - kfree(ctxp); lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, "6056 Failed to Issue ABTS. Status x%x\n", rc); - return 0; + return 1; +} + +/** + * lpfc_nvmet_invalidate_host + * + * @phba - pointer to the driver instance bound to an adapter port. + * @ndlp - pointer to an lpfc_nodelist type + * + * This routine upcalls the nvmet transport to invalidate an NVME + * host to which this target instance had active connections. + */ +void +lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) +{ + struct lpfc_nvmet_tgtport *tgtp; + + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_NVME_ABTS, + "6203 Invalidating hosthandle x%px\n", + ndlp); + + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; + atomic_set(&tgtp->state, LPFC_NVMET_INV_HOST_ACTIVE); + +#if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) + /* Need to get the nvmet_fc_target_port pointer here.*/ + nvmet_fc_invalidate_host(phba->targetport, ndlp); +#endif } diff --git a/drivers/scsi/lpfc/lpfc_nvmet.h b/drivers/scsi/lpfc/lpfc_nvmet.h deleted file mode 100644 index b80b1639b9a7..000000000000 --- a/drivers/scsi/lpfc/lpfc_nvmet.h +++ /dev/null @@ -1,158 +0,0 @@ -/******************************************************************* - * This file is part of the Emulex Linux Device Driver for * - * Fibre Channel Host Bus Adapters. * - * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term * - * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * - * Copyright (C) 2004-2016 Emulex. All rights reserved. * - * EMULEX and SLI are trademarks of Emulex. * - * www.broadcom.com * - * Portions Copyright (C) 2004-2005 Christoph Hellwig * - * * - * This program is free software; you can redistribute it and/or * - * modify it under the terms of version 2 of the GNU General * - * Public License as published by the Free Software Foundation. * - * This program is distributed in the hope that it will be useful. * - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * - * TO BE LEGALLY INVALID. See the GNU General Public License for * - * more details, a copy of which can be found in the file COPYING * - * included with this package. * - ********************************************************************/ - -#define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ -#define LPFC_NVMET_RQE_MIN_POST 128 -#define LPFC_NVMET_RQE_DEF_POST 512 -#define LPFC_NVMET_RQE_DEF_COUNT 2048 -#define LPFC_NVMET_SUCCESS_LEN 12 - -#define LPFC_NVMET_MRQ_AUTO 0 -#define LPFC_NVMET_MRQ_MAX 16 - -#define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC) - -/* Used for NVME Target */ -struct lpfc_nvmet_tgtport { - struct lpfc_hba *phba; - struct completion *tport_unreg_cmp; - - /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ - atomic_t rcv_ls_req_in; - atomic_t rcv_ls_req_out; - atomic_t rcv_ls_req_drop; - atomic_t xmt_ls_abort; - atomic_t xmt_ls_abort_cmpl; - - /* Stats counters - lpfc_nvmet_xmt_ls_rsp */ - atomic_t xmt_ls_rsp; - atomic_t xmt_ls_drop; - - /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */ - atomic_t xmt_ls_rsp_error; - atomic_t xmt_ls_rsp_aborted; - atomic_t xmt_ls_rsp_xb_set; - atomic_t xmt_ls_rsp_cmpl; - - /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */ - atomic_t rcv_fcp_cmd_in; - atomic_t rcv_fcp_cmd_out; - atomic_t rcv_fcp_cmd_drop; - atomic_t rcv_fcp_cmd_defer; - atomic_t xmt_fcp_release; - - /* Stats counters - lpfc_nvmet_xmt_fcp_op */ - atomic_t xmt_fcp_drop; - atomic_t xmt_fcp_read_rsp; - atomic_t xmt_fcp_read; - atomic_t xmt_fcp_write; - atomic_t xmt_fcp_rsp; - - /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */ - atomic_t xmt_fcp_rsp_xb_set; - atomic_t xmt_fcp_rsp_cmpl; - atomic_t xmt_fcp_rsp_error; - atomic_t xmt_fcp_rsp_aborted; - atomic_t xmt_fcp_rsp_drop; - - /* Stats counters - lpfc_nvmet_xmt_fcp_abort */ - atomic_t xmt_fcp_xri_abort_cqe; - atomic_t xmt_fcp_abort; - atomic_t xmt_fcp_abort_cmpl; - atomic_t xmt_abort_sol; - atomic_t xmt_abort_unsol; - atomic_t xmt_abort_rsp; - atomic_t xmt_abort_rsp_error; - - /* Stats counters - defer IO */ - atomic_t defer_ctx; - atomic_t defer_fod; - atomic_t defer_wqfull; -}; - -struct lpfc_nvmet_ctx_info { - struct list_head nvmet_ctx_list; - spinlock_t nvmet_ctx_list_lock; /* lock per CPU */ - struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu; - struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu; - uint16_t nvmet_ctx_list_cnt; - char pad[16]; /* pad to a cache-line */ -}; - -/* This retrieves the context info associated with the specified cpu / mrq */ -#define lpfc_get_ctx_list(phba, cpu, mrq) \ - (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) - -struct lpfc_nvmet_rcv_ctx { - union { - struct nvmefc_tgt_ls_req ls_req; - struct nvmefc_tgt_fcp_req fcp_req; - } ctx; - struct list_head list; - struct lpfc_hba *phba; - struct lpfc_iocbq *wqeq; - struct lpfc_iocbq *abort_wqeq; - spinlock_t ctxlock; /* protect flag access */ - uint32_t sid; - uint32_t offset; - uint16_t oxid; - uint16_t size; - uint16_t entry_cnt; - uint16_t cpu; - uint16_t idx; - uint16_t state; - /* States */ -#define LPFC_NVMET_STE_LS_RCV 1 -#define LPFC_NVMET_STE_LS_ABORT 2 -#define LPFC_NVMET_STE_LS_RSP 3 -#define LPFC_NVMET_STE_RCV 4 -#define LPFC_NVMET_STE_DATA 5 -#define LPFC_NVMET_STE_ABORT 6 -#define LPFC_NVMET_STE_DONE 7 -#define LPFC_NVMET_STE_FREE 0xff - uint16_t flag; -#define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ -#define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ -#define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ -#define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ -#define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ -#define LPFC_NVMET_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ -#define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ -#define LPFC_NVMET_TNOTIFY 0x80 /* notify transport of abts */ - struct rqb_dmabuf *rqb_buffer; - struct lpfc_nvmet_ctxbuf *ctxbuf; - struct lpfc_sli4_hdw_queue *hdwq; - -#ifdef CONFIG_SCSI_LPFC_DEBUG_FS - uint64_t ts_isr_cmd; - uint64_t ts_cmd_nvme; - uint64_t ts_nvme_data; - uint64_t ts_data_wqput; - uint64_t ts_isr_data; - uint64_t ts_data_nvme; - uint64_t ts_nvme_status; - uint64_t ts_status_wqput; - uint64_t ts_isr_status; - uint64_t ts_status_nvme; -#endif -}; diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index b6fb665e6ec4..9e21c4f3b009 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -39,8 +39,6 @@ #include <asm/set_memory.h> #endif -#include <linux/nvme-fc-driver.h> - #include "lpfc_hw4.h" #include "lpfc_hw.h" #include "lpfc_sli.h" @@ -50,7 +48,6 @@ #include "lpfc.h" #include "lpfc_scsi.h" #include "lpfc_nvme.h" -#include "lpfc_nvmet.h" #include "lpfc_crtn.h" #include "lpfc_logmsg.h" #include "lpfc_compat.h" @@ -2796,6 +2793,123 @@ lpfc_sli_get_buff(struct lpfc_hba *phba, } /** + * lpfc_nvme_unsol_ls_handler - Process an unsolicited event data buffer + * containing a NVME LS request. + * @phba: pointer to lpfc hba data structure. + * @piocb: pointer to the iocbq struct representing the sequence starting + * frame. + * + * This routine initially validates the NVME LS, validates there is a login + * with the port that sent the LS, and then calls the appropriate nvme host + * or target LS request handler. + **/ +static void +lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb) +{ + struct lpfc_nodelist *ndlp; + struct lpfc_dmabuf *d_buf; + struct hbq_dmabuf *nvmebuf; + struct fc_frame_header *fc_hdr; + struct lpfc_async_xchg_ctx *axchg = NULL; + char *failwhy = NULL; + uint32_t oxid, sid, did, fctl, size; + int ret = 1; + + d_buf = piocb->context2; + + nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); + fc_hdr = nvmebuf->hbuf.virt; + oxid = be16_to_cpu(fc_hdr->fh_ox_id); + sid = sli4_sid_from_fc_hdr(fc_hdr); + did = sli4_did_from_fc_hdr(fc_hdr); + fctl = (fc_hdr->fh_f_ctl[0] << 16 | + fc_hdr->fh_f_ctl[1] << 8 | + fc_hdr->fh_f_ctl[2]); + size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); + + lpfc_nvmeio_data(phba, "NVME LS RCV: xri x%x sz %d from %06x\n", + oxid, size, sid); + + if (phba->pport->load_flag & FC_UNLOADING) { + failwhy = "Driver Unloading"; + } else if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) { + failwhy = "NVME FC4 Disabled"; + } else if (!phba->nvmet_support && !phba->pport->localport) { + failwhy = "No Localport"; + } else if (phba->nvmet_support && !phba->targetport) { + failwhy = "No Targetport"; + } else if (unlikely(fc_hdr->fh_r_ctl != FC_RCTL_ELS4_REQ)) { + failwhy = "Bad NVME LS R_CTL"; + } else if (unlikely((fctl & 0x00FF0000) != + (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT))) { + failwhy = "Bad NVME LS F_CTL"; + } else { + axchg = kzalloc(sizeof(*axchg), GFP_ATOMIC); + if (!axchg) + failwhy = "No CTX memory"; + } + + if (unlikely(failwhy)) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6154 Drop NVME LS: SID %06X OXID x%X: %s\n", + sid, oxid, failwhy); + goto out_fail; + } + + /* validate the source of the LS is logged in */ + ndlp = lpfc_findnode_did(phba->pport, sid); + if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || + ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && + (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, + "6216 NVME Unsol rcv: No ndlp: " + "NPort_ID x%x oxid x%x\n", + sid, oxid); + goto out_fail; + } + + axchg->phba = phba; + axchg->ndlp = ndlp; + axchg->size = size; + axchg->oxid = oxid; + axchg->sid = sid; + axchg->wqeq = NULL; + axchg->state = LPFC_NVME_STE_LS_RCV; + axchg->entry_cnt = 1; + axchg->rqb_buffer = (void *)nvmebuf; + axchg->hdwq = &phba->sli4_hba.hdwq[0]; + axchg->payload = nvmebuf->dbuf.virt; + INIT_LIST_HEAD(&axchg->list); + + if (phba->nvmet_support) + ret = lpfc_nvmet_handle_lsreq(phba, axchg); + else + ret = lpfc_nvme_handle_lsreq(phba, axchg); + + /* if zero, LS was successfully handled. If non-zero, LS not handled */ + if (!ret) + return; + + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, + "6155 Drop NVME LS from DID %06X: SID %06X OXID x%X " + "NVMe%s handler failed %d\n", + did, sid, oxid, + (phba->nvmet_support) ? "T" : "I", ret); + +out_fail: + + /* recycle receive buffer */ + lpfc_in_buf_free(phba, &nvmebuf->dbuf); + + /* If start of new exchange, abort it */ + if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX))) + ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid); + + if (ret) + kfree(axchg); +} + +/** * lpfc_complete_unsol_iocb - Complete an unsolicited sequence * @phba: Pointer to HBA context object. * @pring: Pointer to driver SLI ring object. @@ -2816,7 +2930,7 @@ lpfc_complete_unsol_iocb(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, switch (fch_type) { case FC_TYPE_NVME: - lpfc_nvmet_unsol_ls_event(phba, pring, saveq); + lpfc_nvme_unsol_ls_handler(phba, saveq); return 1; default: break; @@ -13981,8 +14095,8 @@ lpfc_sli4_nvmet_handle_rcqe(struct lpfc_hba *phba, struct lpfc_queue *cq, /* Just some basic sanity checks on FCP Command frame */ fctl = (fc_hdr->fh_f_ctl[0] << 16 | - fc_hdr->fh_f_ctl[1] << 8 | - fc_hdr->fh_f_ctl[2]); + fc_hdr->fh_f_ctl[1] << 8 | + fc_hdr->fh_f_ctl[2]); if (((fctl & (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) != (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) || @@ -19891,7 +20005,7 @@ lpfc_sli4_issue_wqe(struct lpfc_hba *phba, struct lpfc_sli4_hdw_queue *qp, struct lpfc_iocbq *pwqe) { union lpfc_wqe128 *wqe = &pwqe->wqe; - struct lpfc_nvmet_rcv_ctx *ctxp; + struct lpfc_async_xchg_ctx *ctxp; struct lpfc_queue *wq; struct lpfc_sglq *sglq; struct lpfc_sli_ring *pring; |